diff -Nru mongodb-1.4.4/bson/bson_db.h mongodb-1.6.3/bson/bson_db.h --- mongodb-1.4.4/bson/bson_db.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bson_db.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,70 @@ +/** @file bson_db.h + + This file contains the implementation of BSON-related methods that are required + by the MongoDB database server. + + Normally, for standalone BSON usage, you do not want this file - it will tend to + pull in some other files from the MongoDB project. Thus, bson.h (the main file + one would use) does not include this file. +*/ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../util/optime.h" + +namespace mongo { + + /** + Timestamps are a special BSON datatype that is used internally for replication. + Append a timestamp element to the object being ebuilt. + @param time - in millis (but stored in seconds) + */ + inline BSONObjBuilder& BSONObjBuilder::appendTimestamp( const StringData& fieldName , unsigned long long time , unsigned int inc ){ + OpTime t( (unsigned) (time / 1000) , inc ); + appendTimestamp( fieldName , t.asDate() ); + return *this; + } + + inline OpTime BSONElement::_opTime() const { + if( type() == mongo::Date || type() == Timestamp ) + return OpTime( *reinterpret_cast< const unsigned long long* >( value() ) ); + return OpTime(); + } + + inline string BSONElement::_asCode() const { + switch( type() ){ + case mongo::String: + case Code: + return string(valuestr(), valuestrsize()-1); + case CodeWScope: + return string(codeWScopeCode(), *(int*)(valuestr())-1); + default: + log() << "can't convert type: " << (int)(type()) << " to code" << endl; + } + uassert( 10062 , "not code" , 0 ); + return ""; + } + + inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<(DateNowLabeler& id){ + _builder->appendDate(_fieldName, jsTime()); + _fieldName = 0; + return *_builder; + } + + +} diff -Nru mongodb-1.4.4/bson/bsondemo/bsondemo.cpp mongodb-1.6.3/bson/bsondemo/bsondemo.cpp --- mongodb-1.4.4/bson/bsondemo/bsondemo.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsondemo/bsondemo.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,107 @@ +/** @file bsondemo.cpp + + Example of use of BSON from C++. + + Requires boost (headers only). + Works headers only (the parts actually exercised herein that is - some functions require .cpp files). +*/ + +/* + * Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../bson.h" +#include +#include + +using namespace std; +using namespace bson; + +void iter(bo o) { + /* iterator example */ + cout << "\niter()\n"; + for( bo::iterator i(o); i.more(); ) { + cout << ' ' << i.next().toString() << '\n'; + } +} + +int main() +{ + cout << "build bits: " << 8 * sizeof(char *) << '\n' << endl; + + /* a bson object defaults on construction to { } */ + bo empty; + cout << "empty: " << empty << endl; + + /* make a simple { name : 'joe', age : 33.7 } object */ + { + bob b; + b.append("name", "joe"); + b.append("age", 33.7); + b.obj(); + } + + /* make { name : 'joe', age : 33.7 } with a more compact notation. */ + bo x = bob().append("name", "joe").append("age", 33.7).obj(); + + /* convert from bson to json */ + string json = x.toString(); + cout << "json for x:" << json << endl; + + /* access some fields of bson object x */ + cout << "Some x things: " << x["name"] << ' ' << x["age"].Number() << ' ' << x.isEmpty() << endl; + + /* make a bit more complex object with some nesting + { x : 'asdf', y : true, subobj : { z : 3, q : 4 } } + */ + bo y = BSON( "x" << "asdf" << "y" << true << "subobj" << BSON( "z" << 3 << "q" << 4 ) ); + + /* print it */ + cout << "y: " << y << endl; + + /* reach in and get subobj.z */ + cout << "subobj.z: " << y.getFieldDotted("subobj.z").Number() << endl; + + /* alternate syntax: */ + cout << "subobj.z: " << y["subobj"]["z"].Number() << endl; + + /* fetch all *top level* elements from object y into a vector */ + vector v; + y.elems(v); + cout << v[0] << endl; + + /* into an array */ + list L; + y.elems(L); + + bo sub = y["subobj"].Obj(); + + /* grab all the int's that were in subobj. if it had elements that were not ints, we throw an exception + (capital V on Vals() means exception if wrong type found + */ + vector myints; + sub.Vals(myints); + cout << "my ints: " << myints[0] << ' ' << myints[1] << endl; + + /* grab all the string values from x. if the field isn't of string type, just skip it -- + lowercase v on vals() indicates skip don't throw. + */ + vector strs; + x.vals(strs); + cout << strs.size() << " strings, first one: " << strs[0] << endl; + + iter(y); + return 0; +} diff -Nru mongodb-1.4.4/bson/bsondemo/bsondemo.vcproj mongodb-1.6.3/bson/bsondemo/bsondemo.vcproj --- mongodb-1.4.4/bson/bsondemo/bsondemo.vcproj 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsondemo/bsondemo.vcproj 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,243 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mongodb-1.4.4/bson/bsondemo/bsondemo.vcxproj mongodb-1.6.3/bson/bsondemo/bsondemo.vcxproj --- mongodb-1.4.4/bson/bsondemo/bsondemo.vcxproj 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsondemo/bsondemo.vcxproj 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,193 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {C9DB5EB7-81AA-4185-BAA1-DA035654402F} + bsondemo + Win32Proj + + + + Application + Unicode + true + + + Application + Unicode + true + + + Application + Unicode + + + Application + Unicode + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + $(Configuration)\ + true + true + $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + $(Configuration)\ + false + false + AllRules.ruleset + AllRules.ruleset + + + + + AllRules.ruleset + AllRules.ruleset + + + + + + + + Disabled + c:\boost;\boost + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + No + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + EditAndContinue + true + + + true + Console + MachineX86 + + + + + Disabled + c:\boost;\boost + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebugDLL + + + Level3 + ProgramDatabase + No + true + + + true + Console + + + + + MaxSpeed + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + c:\boost;\boost + No + true + + + true + Console + true + true + MachineX86 + + + + + MaxSpeed + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + MultiThreadedDLL + true + + + Level3 + ProgramDatabase + c:\boost;\boost + No + true + + + true + Console + true + true + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mongodb-1.4.4/bson/bsondemo/bsondemo.vcxproj.filters mongodb-1.6.3/bson/bsondemo/bsondemo.vcxproj.filters --- mongodb-1.4.4/bson/bsondemo/bsondemo.vcxproj.filters 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsondemo/bsondemo.vcxproj.filters 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,52 @@ + + + + + + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + + + {ea599740-3c6f-40dd-a121-e825d82ae4aa} + + + diff -Nru mongodb-1.4.4/bson/bsonelement.h mongodb-1.6.3/bson/bsonelement.h --- mongodb-1.4.4/bson/bsonelement.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsonelement.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,549 @@ +// BSONElement + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include "util/builder.h" + +namespace bson { + typedef mongo::BSONElement be; + typedef mongo::BSONObj bo; + typedef mongo::BSONObjBuilder bob; +} + +namespace mongo { + + class OpTime; + class BSONElement; + + /* l and r MUST have same type when called: check that first. */ + int compareElementValues(const BSONElement& l, const BSONElement& r); + + +/** BSONElement represents an "element" in a BSONObj. So for the object { a : 3, b : "abc" }, + 'a : 3' is the first element (key+value). + + The BSONElement object points into the BSONObj's data. Thus the BSONObj must stay in scope + for the life of the BSONElement. + + internals: + + -------- size() ------------ + -fieldNameSize- + value() + type() +*/ +class BSONElement { +public: + /** These functions, which start with a capital letter, throw a UserException if the + element is not of the required type. Example: + + string foo = obj["foo"].String(); // exception if not a string type or DNE + */ + string String() const { return chk(mongo::String).valuestr(); } + Date_t Date() const { return chk(mongo::Date).date(); } + double Number() const { return chk(isNumber()).number(); } + double Double() const { return chk(NumberDouble)._numberDouble(); } + long long Long() const { return chk(NumberLong)._numberLong(); } + int Int() const { return chk(NumberInt)._numberInt(); } + bool Bool() const { return chk(mongo::Bool).boolean(); } + BSONObj Obj() const; + vector Array() const; // see implementation for detailed comments + mongo::OID OID() const { return chk(jstOID).__oid(); } + void Null() const { chk(isNull()); } + void OK() const { chk(ok()); } + + /** populate v with the value of the element. If type does not match, throw exception. + useful in templates -- see also BSONObj::Vals(). + */ + void Val(Date_t& v) const { v = Date(); } + void Val(long long& v) const { v = Long(); } + void Val(bool& v) const { v = Bool(); } + void Val(BSONObj& v) const; + void Val(mongo::OID& v) const { v = OID(); } + void Val(int& v) const { v = Int(); } + void Val(double& v) const { v = Double(); } + void Val(string& v) const { v = String(); } + + /** Use ok() to check if a value is assigned: + if( myObj["foo"].ok() ) ... + */ + bool ok() const { return !eoo(); } + + string toString( bool includeFieldName = true, bool full=false) const; + void toString(StringBuilder& s, bool includeFieldName = true, bool full=false) const; + string jsonString( JsonStringFormat format, bool includeFieldNames = true, int pretty = 0 ) const; + operator string() const { return toString(); } + + /** Returns the type of the element */ + BSONType type() const { return (BSONType) *data; } + + /** retrieve a field within this element + throws exception if *this is not an embedded object + */ + BSONElement operator[] (const string& field) const; + + /** returns the tyoe of the element fixed for the main type + the main purpose is numbers. any numeric type will return NumberDouble + Note: if the order changes, indexes have to be re-built or than can be corruption + */ + int canonicalType() const; + + /** Indicates if it is the end-of-object element, which is present at the end of + every BSON object. + */ + bool eoo() const { return type() == EOO; } + + /** Size of the element. + @param maxLen If maxLen is specified, don't scan more than maxLen bytes to calculate size. + */ + int size( int maxLen = -1 ) const; + + /** Wrap this element up as a singleton object. */ + BSONObj wrap() const; + + /** Wrap this element up as a singleton object with a new name. */ + BSONObj wrap( const char* newName) const; + + /** field name of the element. e.g., for + name : "Joe" + "name" is the fieldname + */ + const char * fieldName() const { + if ( eoo() ) return ""; // no fieldname for it. + return data + 1; + } + + /** raw data of the element's value (so be careful). */ + const char * value() const { + return (data + fieldNameSize() + 1); + } + /** size in bytes of the element's value (when applicable). */ + int valuesize() const { + return size() - fieldNameSize() - 1; + } + + bool isBoolean() const { return type() == mongo::Bool; } + + /** @return value of a boolean element. + You must assure element is a boolean before + calling. */ + bool boolean() const { + return *value() ? true : false; + } + + /** Retrieve a java style date value from the element. + Ensure element is of type Date before calling. + */ + Date_t date() const { + return *reinterpret_cast< const Date_t* >( value() ); + } + + /** Convert the value to boolean, regardless of its type, in a javascript-like fashion + (i.e., treat zero and null as false). + */ + bool trueValue() const; + + /** True if number, string, bool, date, OID */ + bool isSimpleType() const; + + /** True if element is of a numeric type. */ + bool isNumber() const; + + /** Return double value for this field. MUST be NumberDouble type. */ + double _numberDouble() const {return *reinterpret_cast< const double* >( value() ); } + /** Return double value for this field. MUST be NumberInt type. */ + int _numberInt() const {return *reinterpret_cast< const int* >( value() ); } + /** Return double value for this field. MUST be NumberLong type. */ + long long _numberLong() const {return *reinterpret_cast< const long long* >( value() ); } + + /** Retrieve int value for the element safely. Zero returned if not a number. */ + int numberInt() const; + /** Retrieve long value for the element safely. Zero returned if not a number. */ + long long numberLong() const; + /** Retrieve the numeric value of the element. If not of a numeric type, returns 0. + Note: casts to double, data loss may occur with large (>52 bit) NumberLong values. + */ + double numberDouble() const; + /** Retrieve the numeric value of the element. If not of a numeric type, returns 0. + Note: casts to double, data loss may occur with large (>52 bit) NumberLong values. + */ + double number() const { return numberDouble(); } + + /** Retrieve the object ID stored in the object. + You must ensure the element is of type jstOID first. */ + const mongo::OID &__oid() const { return *reinterpret_cast< const mongo::OID* >( value() ); } + + /** True if element is null. */ + bool isNull() const { + return type() == jstNULL; + } + + /** Size (length) of a string element. + You must assure of type String first. */ + int valuestrsize() const { + return *reinterpret_cast< const int* >( value() ); + } + + // for objects the size *includes* the size of the size field + int objsize() const { + return *reinterpret_cast< const int* >( value() ); + } + + /** Get a string's value. Also gives you start of the real data for an embedded object. + You must assure data is of an appropriate type first -- see also valuestrsafe(). + */ + const char * valuestr() const { + return value() + 4; + } + + /** Get the string value of the element. If not a string returns "". */ + const char *valuestrsafe() const { + return type() == mongo::String ? valuestr() : ""; + } + /** Get the string value of the element. If not a string returns "". */ + string str() const { + return type() == mongo::String ? string(valuestr(), valuestrsize()-1) : string(); + } + + /** Get javascript code of a CodeWScope data element. */ + const char * codeWScopeCode() const { + return value() + 8; + } + /** Get the scope SavedContext of a CodeWScope data element. */ + const char * codeWScopeScopeData() const { + // TODO fix + return codeWScopeCode() + strlen( codeWScopeCode() ) + 1; + } + + /** Get the embedded object this element holds. */ + BSONObj embeddedObject() const; + + /* uasserts if not an object */ + BSONObj embeddedObjectUserCheck() const; + + BSONObj codeWScopeObject() const; + + /** Get raw binary data. Element must be of type BinData. Doesn't handle type 2 specially */ + const char *binData(int& len) const { + // BinData: + assert( type() == BinData ); + len = valuestrsize(); + return value() + 5; + } + /** Get binary data. Element must be of type BinData. Handles type 2 */ + const char *binDataClean(int& len) const { + // BinData: + if (binDataType() != ByteArrayDeprecated){ + return binData(len); + } else { + // Skip extra size + len = valuestrsize() - 4; + return value() + 5 + 4; + } + } + + BinDataType binDataType() const { + // BinData: + assert( type() == BinData ); + unsigned char c = (value() + 4)[0]; + return (BinDataType)c; + } + + /** Retrieve the regex string for a Regex element */ + const char *regex() const { + assert(type() == RegEx); + return value(); + } + + /** Retrieve the regex flags (options) for a Regex element */ + const char *regexFlags() const { + const char *p = regex(); + return p + strlen(p) + 1; + } + + /** like operator== but doesn't check the fieldname, + just the value. + */ + bool valuesEqual(const BSONElement& r) const { + return woCompare( r , false ) == 0; + } + + /** Returns true if elements are equal. */ + bool operator==(const BSONElement& r) const { + return woCompare( r , true ) == 0; + } + + /** Well ordered comparison. + @return <0: l0:l>r + order by type, field name, and field value. + If considerFieldName is true, pay attention to the field name. + */ + int woCompare( const BSONElement &e, bool considerFieldName = true ) const; + + const char * rawdata() const { + return data; + } + + /** 0 == Equality, just not defined yet */ + int getGtLtOp( int def = 0 ) const; + + /** Constructs an empty element */ + BSONElement(); + + /** Check that data is internally consistent. */ + void validate() const; + + /** True if this element may contain subobjects. */ + bool mayEncapsulate() const { + switch ( type() ){ + case Object: + case mongo::Array: + case CodeWScope: + return true; + default: + return false; + } + } + + /** True if this element can be a BSONObj */ + bool isABSONObj() const { + switch( type() ){ + case Object: + case mongo::Array: + return true; + default: + return false; + } + } + + Date_t timestampTime() const{ + unsigned long long t = ((unsigned int*)(value() + 4 ))[0]; + return t * 1000; + } + unsigned int timestampInc() const{ + return ((unsigned int*)(value() ))[0]; + } + + const char * dbrefNS() const { + uassert( 10063 , "not a dbref" , type() == DBRef ); + return value() + 4; + } + + const mongo::OID& dbrefOID() const { + uassert( 10064 , "not a dbref" , type() == DBRef ); + const char * start = value(); + start += 4 + *reinterpret_cast< const int* >( start ); + return *reinterpret_cast< const mongo::OID* >( start ); + } + + bool operator<( const BSONElement& other ) const { + int x = (int)canonicalType() - (int)other.canonicalType(); + if ( x < 0 ) return true; + else if ( x > 0 ) return false; + return compareElementValues(*this,other) < 0; + } + + // If maxLen is specified, don't scan more than maxLen bytes. + explicit BSONElement(const char *d, int maxLen = -1) : data(d) { + fieldNameSize_ = -1; + if ( eoo() ) + fieldNameSize_ = 0; + else { + if ( maxLen != -1 ) { + int size = (int) strnlen( fieldName(), maxLen - 1 ); + massert( 10333 , "Invalid field name", size != -1 ); + fieldNameSize_ = size + 1; + } + } + totalSize = -1; + } + + string _asCode() const; + OpTime _opTime() const; + +private: + const char *data; + mutable int fieldNameSize_; // cached value + int fieldNameSize() const { + if ( fieldNameSize_ == -1 ) + fieldNameSize_ = (int)strlen( fieldName() ) + 1; + return fieldNameSize_; + } + mutable int totalSize; /* caches the computed size */ + + friend class BSONObjIterator; + friend class BSONObj; + const BSONElement& chk(int t) const { + if ( t != type() ){ + StringBuilder ss; + ss << "wrong type for BSONElement (" << fieldName() << ") " << type() << " != " << t; + uasserted(13111, ss.str() ); + } + return *this; + } + const BSONElement& chk(bool expr) const { + uassert(13118, "unexpected or missing type value in BSON object", expr); + return *this; + } +}; + + + inline int BSONElement::canonicalType() const { + BSONType t = type(); + switch ( t ){ + case MinKey: + case MaxKey: + return t; + case EOO: + case Undefined: + return 0; + case jstNULL: + return 5; + case NumberDouble: + case NumberInt: + case NumberLong: + return 10; + case mongo::String: + case Symbol: + return 15; + case Object: + return 20; + case mongo::Array: + return 25; + case BinData: + return 30; + case jstOID: + return 35; + case mongo::Bool: + return 40; + case mongo::Date: + case Timestamp: + return 45; + case RegEx: + return 50; + case DBRef: + return 55; + case Code: + return 60; + case CodeWScope: + return 65; + default: + assert(0); + return -1; + } + } + + inline bool BSONElement::trueValue() const { + switch( type() ) { + case NumberLong: + return *reinterpret_cast< const long long* >( value() ) != 0; + case NumberDouble: + return *reinterpret_cast< const double* >( value() ) != 0; + case NumberInt: + return *reinterpret_cast< const int* >( value() ) != 0; + case mongo::Bool: + return boolean(); + case EOO: + case jstNULL: + case Undefined: + return false; + + default: + ; + } + return true; + } + + /** True if element is of a numeric type. */ + inline bool BSONElement::isNumber() const { + switch( type() ) { + case NumberLong: + case NumberDouble: + case NumberInt: + return true; + default: + return false; + } + } + + inline bool BSONElement::isSimpleType() const { + switch( type() ){ + case NumberLong: + case NumberDouble: + case NumberInt: + case mongo::String: + case mongo::Bool: + case mongo::Date: + case jstOID: + return true; + default: + return false; + } + } + + inline double BSONElement::numberDouble() const { + switch( type() ) { + case NumberDouble: + return _numberDouble(); + case NumberInt: + return *reinterpret_cast< const int* >( value() ); + case NumberLong: + return (double) *reinterpret_cast< const long long* >( value() ); + default: + return 0; + } + } + + /** Retrieve int value for the element safely. Zero returned if not a number. Converted to int if another numeric type. */ + inline int BSONElement::numberInt() const { + switch( type() ) { + case NumberDouble: + return (int) _numberDouble(); + case NumberInt: + return _numberInt(); + case NumberLong: + return (int) _numberLong(); + default: + return 0; + } + } + + /** Retrieve long value for the element safely. Zero returned if not a number. */ + inline long long BSONElement::numberLong() const { + switch( type() ) { + case NumberDouble: + return (long long) _numberDouble(); + case NumberInt: + return _numberInt(); + case NumberLong: + return _numberLong(); + default: + return 0; + } + } + + inline BSONElement::BSONElement() { + static char z = 0; + data = &z; + fieldNameSize_ = 0; + totalSize = 1; + } + +} diff -Nru mongodb-1.4.4/bson/bson.h mongodb-1.6.3/bson/bson.h --- mongodb-1.4.4/bson/bson.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bson.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,123 @@ +/* NOTE: Standalone bson header for when not using MongoDB. + See also: bsondemo. + + MongoDB includes ../db/jsobj.h instead. This file, however, pulls in much less code / dependencies. +*/ + +/** @file bson.h + BSON classes +*/ + +/* + * Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + BSONObj and its helpers + + "BSON" stands for "binary JSON" -- ie a binary way to represent objects that would be + represented in JSON (plus a few extensions useful for databases & other languages). + + http://www.bsonspec.org/ +*/ + +#pragma once + +#if defined(MONGO_EXPOSE_MACROS) +#error this header is for client programs, not the mongo database itself. include jsobj.h instead. +/* because we define simplistic assert helpers here that don't pull in a bunch of util -- so that + BSON can be used header only. + */ +#endif + +#include +#include +#include +#include "util/builder.h" + +namespace bson { + + using std::string; + using std::stringstream; + + class assertion : public std::exception { + public: + assertion( unsigned u , const string& s ) + : id( u ) , msg( s ){ + mongo::StringBuilder ss; + ss << "BsonAssertion id: " << u << " " << s; + full = ss.str(); + } + + virtual ~assertion() throw() {} + + virtual const char* what() const throw() { return full.c_str(); } + + unsigned id; + string msg; + string full; + }; +} + +namespace mongo { +#if !defined(assert) + inline void assert(bool expr) { + if(!expr) { + throw bson::assertion( 0 , "assertion failure in bson library" ); + } + } +#endif +#if !defined(uassert) + inline void uasserted(unsigned msgid, std::string s) { + throw bson::assertion( msgid , s ); + } + + inline void uassert(unsigned msgid, std::string msg, bool expr) { + if( !expr ) + uasserted( msgid , msg ); + } + inline void msgasserted(int msgid, const char *msg) { + throw bson::assertion( msgid , msg ); + } + inline void msgasserted(int msgid, const std::string &msg) { msgasserted(msgid, msg.c_str()); } + inline void massert(unsigned msgid, std::string msg, bool expr) { + if(!expr) { + std::cout << "assertion failure in bson library: " << msgid << ' ' << msg << std::endl; + throw bson::assertion( msgid , msg ); + } + } +#endif +} + +#include "../bson/bsontypes.h" +#include "../bson/oid.h" +#include "../bson/bsonelement.h" +#include "../bson/bsonobj.h" +#include "../bson/bsonmisc.h" +#include "../bson/bsonobjbuilder.h" +#include "../bson/bsonobjiterator.h" +#include "../bson/bsoninlines.h" + +namespace mongo { + + inline unsigned getRandomNumber() { +#if defined(_WIN32) + return rand(); +#else + return random(); +#endif + } + +} diff -Nru mongodb-1.4.4/bson/bsoninlines.h mongodb-1.6.3/bson/bsoninlines.h --- mongodb-1.4.4/bson/bsoninlines.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsoninlines.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,588 @@ +// bsoninlines.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include "util/atomic_int.h" +#include "util/misc.h" +#include "../util/hex.h" + +namespace mongo { + + inline BSONObjIterator BSONObj::begin() { + return BSONObjIterator(*this); + } + + inline BSONObj BSONElement::embeddedObjectUserCheck() const { + uassert( 10065 , "invalid parameter: expected an object", isABSONObj() ); + return BSONObj(value()); + } + + inline BSONObj BSONElement::embeddedObject() const { + assert( isABSONObj() ); + return BSONObj(value()); + } + + inline BSONObj BSONElement::codeWScopeObject() const { + assert( type() == CodeWScope ); + int strSizeWNull = *(int *)( value() + 4 ); + return BSONObj( value() + 4 + 4 + strSizeWNull ); + } + + inline BSONObj BSONObj::copy() const { + char *p = (char*) malloc(objsize()); + memcpy(p, objdata(), objsize()); + return BSONObj(p, true); + } + + // wrap this element up as a singleton object. + inline BSONObj BSONElement::wrap() const { + BSONObjBuilder b(size()+6); + b.append(*this); + return b.obj(); + } + + inline BSONObj BSONElement::wrap( const char * newName ) const { + BSONObjBuilder b(size()+6+(int)strlen(newName)); + b.appendAs(*this,newName); + return b.obj(); + } + + + inline bool BSONObj::hasElement(const char *name) const { + if ( !isEmpty() ) { + BSONObjIterator it(*this); + while ( it.moreWithEOO() ) { + BSONElement e = it.next(); + if ( strcmp(name, e.fieldName()) == 0 ) + return true; + } + } + return false; + } + + inline BSONElement BSONObj::getField(const StringData& name) const { + BSONObjIterator i(*this); + while ( i.more() ) { + BSONElement e = i.next(); + if ( strcmp(e.fieldName(), name.data()) == 0 ) + return e; + } + return BSONElement(); + } + + /* add all the fields from the object specified to this object */ + inline BSONObjBuilder& BSONObjBuilder::appendElements(BSONObj x) { + BSONObjIterator it(x); + while ( it.moreWithEOO() ) { + BSONElement e = it.next(); + if ( e.eoo() ) break; + append(e); + } + return *this; + } + + inline bool BSONObj::isValid(){ + int x = objsize(); + return x > 0 && x <= 1024 * 1024 * 8; + } + + inline bool BSONObj::getObjectID(BSONElement& e) const { + BSONElement f = getField("_id"); + if( !f.eoo() ) { + e = f; + return true; + } + return false; + } + + inline BSONObjBuilderValueStream::BSONObjBuilderValueStream( BSONObjBuilder * builder ) { + _fieldName = 0; + _builder = builder; + } + + template + inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( T value ) { + _builder->append(_fieldName, value); + _fieldName = 0; + return *_builder; + } + + inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( const BSONElement& e ) { + _builder->appendAs( e , _fieldName ); + _fieldName = 0; + return *_builder; + } + + inline Labeler BSONObjBuilderValueStream::operator<<( const Labeler::Label &l ) { + return Labeler( l, this ); + } + + inline void BSONObjBuilderValueStream::endField( const char *nextFieldName ) { + if ( _fieldName && haveSubobj() ) { + _builder->append( _fieldName, subobj()->done() ); + } + _subobj.reset(); + _fieldName = nextFieldName; + } + + inline BSONObjBuilder *BSONObjBuilderValueStream::subobj() { + if ( !haveSubobj() ) + _subobj.reset( new BSONObjBuilder() ); + return _subobj.get(); + } + + template inline + BSONObjBuilder& Labeler::operator<<( T value ) { + s_->subobj()->append( l_.l_, value ); + return *s_->_builder; + } + + inline + BSONObjBuilder& Labeler::operator<<( const BSONElement& e ) { + s_->subobj()->appendAs( e, l_.l_ ); + return *s_->_builder; + } + + // {a: {b:1}} -> {a.b:1} + void nested2dotted(BSONObjBuilder& b, const BSONObj& obj, const string& base=""); + inline BSONObj nested2dotted(const BSONObj& obj){ + BSONObjBuilder b; + nested2dotted(b, obj); + return b.obj(); + } + + // {a.b:1} -> {a: {b:1}} + void dotted2nested(BSONObjBuilder& b, const BSONObj& obj); + inline BSONObj dotted2nested(const BSONObj& obj){ + BSONObjBuilder b; + dotted2nested(b, obj); + return b.obj(); + } + + inline BSONObjIterator BSONObjBuilder::iterator() const { + const char * s = _b.buf() + _offset; + const char * e = _b.buf() + _b.len(); + return BSONObjIterator( s , e ); + } + + /* WARNING: nested/dotted conversions are not 100% reversible + * nested2dotted(dotted2nested({a.b: {c:1}})) -> {a.b.c: 1} + * also, dotted2nested ignores order + */ + + typedef map BSONMap; + inline BSONMap bson2map(const BSONObj& obj){ + BSONMap m; + BSONObjIterator it(obj); + while (it.more()){ + BSONElement e = it.next(); + m[e.fieldName()] = e; + } + return m; + } + + struct BSONElementFieldNameCmp { + bool operator()( const BSONElement &l, const BSONElement &r ) const { + return strcmp( l.fieldName() , r.fieldName() ) <= 0; + } + }; + + typedef set BSONSortedElements; + inline BSONSortedElements bson2set( const BSONObj& obj ){ + BSONSortedElements s; + BSONObjIterator it(obj); + while ( it.more() ) + s.insert( it.next() ); + return s; + } + + inline string BSONObj::toString( bool isArray, bool full ) const { + if ( isEmpty() ) return "{}"; + StringBuilder s; + toString(s, isArray, full); + return s.str(); + } + inline void BSONObj::toString(StringBuilder& s, bool isArray, bool full ) const { + if ( isEmpty() ){ + s << "{}"; + return; + } + + s << ( isArray ? "[ " : "{ " ); + BSONObjIterator i(*this); + bool first = true; + while ( 1 ) { + massert( 10327 , "Object does not end with EOO", i.moreWithEOO() ); + BSONElement e = i.next( true ); + massert( 10328 , "Invalid element size", e.size() > 0 ); + massert( 10329 , "Element too large", e.size() < ( 1 << 30 ) ); + int offset = (int) (e.rawdata() - this->objdata()); + massert( 10330 , "Element extends past end of object", + e.size() + offset <= this->objsize() ); + e.validate(); + bool end = ( e.size() + offset == this->objsize() ); + if ( e.eoo() ) { + massert( 10331 , "EOO Before end of object", end ); + break; + } + if ( first ) + first = false; + else + s << ", "; + e.toString(s, !isArray, full ); + } + s << ( isArray ? " ]" : " }" ); + } + + extern unsigned getRandomNumber(); + + inline void BSONElement::validate() const { + const BSONType t = type(); + + switch( t ) { + case DBRef: + case Code: + case Symbol: + case mongo::String: { + int x = valuestrsize(); + if ( x > 0 && valuestr()[x-1] == 0 ) + return; + StringBuilder buf; + buf << "Invalid dbref/code/string/symbol size: " << x << " strnlen:" << mongo::strnlen( valuestr() , x ); + msgasserted( 10321 , buf.str() ); + break; + } + case CodeWScope: { + int totalSize = *( int * )( value() ); + massert( 10322 , "Invalid CodeWScope size", totalSize >= 8 ); + int strSizeWNull = *( int * )( value() + 4 ); + massert( 10323 , "Invalid CodeWScope string size", totalSize >= strSizeWNull + 4 + 4 ); + massert( 10324 , "Invalid CodeWScope string size", + strSizeWNull > 0 && + (strSizeWNull - 1) == mongo::strnlen( codeWScopeCode(), strSizeWNull ) ); + massert( 10325 , "Invalid CodeWScope size", totalSize >= strSizeWNull + 4 + 4 + 4 ); + int objSize = *( int * )( value() + 4 + 4 + strSizeWNull ); + massert( 10326 , "Invalid CodeWScope object size", totalSize == 4 + 4 + strSizeWNull + objSize ); + // Subobject validation handled elsewhere. + } + case Object: + // We expect Object size validation to be handled elsewhere. + default: + break; + } + } + + inline int BSONElement::size( int maxLen ) const { + if ( totalSize >= 0 ) + return totalSize; + + int remain = maxLen - fieldNameSize() - 1; + + int x = 0; + switch ( type() ) { + case EOO: + case Undefined: + case jstNULL: + case MaxKey: + case MinKey: + break; + case mongo::Bool: + x = 1; + break; + case NumberInt: + x = 4; + break; + case Timestamp: + case mongo::Date: + case NumberDouble: + case NumberLong: + x = 8; + break; + case jstOID: + x = 12; + break; + case Symbol: + case Code: + case mongo::String: + massert( 10313 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); + x = valuestrsize() + 4; + break; + case CodeWScope: + massert( 10314 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); + x = objsize(); + break; + + case DBRef: + massert( 10315 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); + x = valuestrsize() + 4 + 12; + break; + case Object: + case mongo::Array: + massert( 10316 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); + x = objsize(); + break; + case BinData: + massert( 10317 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); + x = valuestrsize() + 4 + 1/*subtype*/; + break; + case RegEx: + { + const char *p = value(); + size_t len1 = ( maxLen == -1 ) ? strlen( p ) : mongo::strnlen( p, remain ); + //massert( 10318 , "Invalid regex string", len1 != -1 ); // ERH - 4/28/10 - don't think this does anything + p = p + len1 + 1; + size_t len2 = ( maxLen == -1 ) ? strlen( p ) : mongo::strnlen( p, remain - len1 - 1 ); + //massert( 10319 , "Invalid regex options string", len2 != -1 ); // ERH - 4/28/10 - don't think this does anything + x = (int) (len1 + 1 + len2 + 1); + } + break; + default: { + StringBuilder ss; + ss << "BSONElement: bad type " << (int) type(); + string msg = ss.str(); + massert( 10320 , msg.c_str(),false); + } + } + totalSize = x + fieldNameSize() + 1; // BSONType + + return totalSize; + } + + inline string BSONElement::toString( bool includeFieldName, bool full ) const { + StringBuilder s; + toString(s, includeFieldName, full); + return s.str(); + } + inline void BSONElement::toString(StringBuilder& s, bool includeFieldName, bool full ) const { + if ( includeFieldName && type() != EOO ) + s << fieldName() << ": "; + switch ( type() ) { + case EOO: + s << "EOO"; + break; + case mongo::Date: + s << "new Date(" << date() << ')'; + break; + case RegEx: + { + s << "/" << regex() << '/'; + const char *p = regexFlags(); + if ( p ) s << p; + } + break; + case NumberDouble: + s.appendDoubleNice( number() ); + break; + case NumberLong: + s << _numberLong(); + break; + case NumberInt: + s << _numberInt(); + break; + case mongo::Bool: + s << ( boolean() ? "true" : "false" ); + break; + case Object: + embeddedObject().toString(s, false, full); + break; + case mongo::Array: + embeddedObject().toString(s, true, full); + break; + case Undefined: + s << "undefined"; + break; + case jstNULL: + s << "null"; + break; + case MaxKey: + s << "MaxKey"; + break; + case MinKey: + s << "MinKey"; + break; + case CodeWScope: + s << "CodeWScope( " + << codeWScopeCode() << ", " << codeWScopeObject().toString(false, full) << ")"; + break; + case Code: + if ( !full && valuestrsize() > 80 ) { + s.write(valuestr(), 70); + s << "..."; + } else { + s.write(valuestr(), valuestrsize()-1); + } + break; + case Symbol: + case mongo::String: + s << '"'; + if ( !full && valuestrsize() > 80 ) { + s.write(valuestr(), 70); + s << "...\""; + } else { + s.write(valuestr(), valuestrsize()-1); + s << '"'; + } + break; + case DBRef: + s << "DBRef('" << valuestr() << "',"; + { + mongo::OID *x = (mongo::OID *) (valuestr() + valuestrsize()); + s << *x << ')'; + } + break; + case jstOID: + s << "ObjectId('"; + s << __oid() << "')"; + break; + case BinData: + s << "BinData"; + if (full){ + int len; + const char* data = binDataClean(len); + s << '(' << binDataType() << ", " << toHex(data, len) << ')'; + } + break; + case Timestamp: + s << "Timestamp " << timestampTime() << "|" << timestampInc(); + break; + default: + s << "?type=" << type(); + break; + } + } + + /* return has eoo() true if no match + supports "." notation to reach into embedded objects + */ + inline BSONElement BSONObj::getFieldDotted(const char *name) const { + BSONElement e = getField( name ); + if ( e.eoo() ) { + const char *p = strchr(name, '.'); + if ( p ) { + string left(name, p-name); + BSONObj sub = getObjectField(left.c_str()); + return sub.isEmpty() ? BSONElement() : sub.getFieldDotted(p+1); + } + } + + return e; + } + + inline BSONObj BSONObj::getObjectField(const char *name) const { + BSONElement e = getField(name); + BSONType t = e.type(); + return t == Object || t == Array ? e.embeddedObject() : BSONObj(); + } + + inline int BSONObj::nFields() const { + int n = 0; + BSONObjIterator i(*this); + while ( i.moreWithEOO() ) { + BSONElement e = i.next(); + if ( e.eoo() ) + break; + n++; + } + return n; + } + + inline BSONObj::BSONObj() { + /* LITTLE ENDIAN */ + static char p[] = { 5, 0, 0, 0, 0 }; + _objdata = p; + } + + inline BSONObj BSONElement::Obj() const { return embeddedObjectUserCheck(); } + + inline BSONElement BSONElement::operator[] (const string& field) const { + BSONObj o = Obj(); + return o[field]; + } + + inline void BSONObj::elems(vector &v) const { + BSONObjIterator i(*this); + while( i.more() ) + v.push_back(i.next()); + } + + inline void BSONObj::elems(list &v) const { + BSONObjIterator i(*this); + while( i.more() ) + v.push_back(i.next()); + } + + template + void BSONObj::Vals(vector& v) const { + BSONObjIterator i(*this); + while( i.more() ) { + T t; + i.next().Val(t); + v.push_back(t); + } + } + template + void BSONObj::Vals(list& v) const { + BSONObjIterator i(*this); + while( i.more() ) { + T t; + i.next().Val(t); + v.push_back(t); + } + } + + template + void BSONObj::vals(vector& v) const { + BSONObjIterator i(*this); + while( i.more() ) { + try { + T t; + i.next().Val(t); + v.push_back(t); + } catch(...) { } + } + } + template + void BSONObj::vals(list& v) const { + BSONObjIterator i(*this); + while( i.more() ) { + try { + T t; + i.next().Val(t); + v.push_back(t); + } catch(...) { } + } + } + + inline ostream& operator<<( ostream &s, const BSONObj &o ) { + return s << o.toString(); + } + + inline ostream& operator<<( ostream &s, const BSONElement &e ) { + return s << e.toString(); + } + + inline void BSONElement::Val(BSONObj& v) const { v = Obj(); } + + template + inline BSONFieldValue BSONField::query( const char * q , const T& t ) const { + BSONObjBuilder b; + b.append( q , t ); + return BSONFieldValue( _name , b.obj() ); + } +} diff -Nru mongodb-1.4.4/bson/bsonmisc.h mongodb-1.6.3/bson/bsonmisc.h --- mongodb-1.4.4/bson/bsonmisc.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsonmisc.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,195 @@ +// @file bsonmisc.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace mongo { + + int getGtLtOp(const BSONElement& e); + + struct BSONElementCmpWithoutField { + bool operator()( const BSONElement &l, const BSONElement &r ) const { + return l.woCompare( r, false ) < 0; + } + }; + + class BSONObjCmp { + public: + BSONObjCmp( const BSONObj &_order = BSONObj() ) : order( _order ) {} + bool operator()( const BSONObj &l, const BSONObj &r ) const { + return l.woCompare( r, order ) < 0; + } + private: + BSONObj order; + }; + + class BSONObjCmpDefaultOrder : public BSONObjCmp { + public: + BSONObjCmpDefaultOrder() : BSONObjCmp( BSONObj() ) {} + }; + + typedef set< BSONObj, BSONObjCmpDefaultOrder > BSONObjSetDefaultOrder; + + enum FieldCompareResult { + LEFT_SUBFIELD = -2, + LEFT_BEFORE = -1, + SAME = 0, + RIGHT_BEFORE = 1 , + RIGHT_SUBFIELD = 2 + }; + + FieldCompareResult compareDottedFieldNames( const string& l , const string& r ); + +/** Use BSON macro to build a BSONObj from a stream + + e.g., + BSON( "name" << "joe" << "age" << 33 ) + + with auto-generated object id: + BSON( GENOID << "name" << "joe" << "age" << 33 ) + + The labels GT, GTE, LT, LTE, NE can be helpful for stream-oriented construction + of a BSONObj, particularly when assembling a Query. For example, + BSON( "a" << GT << 23.4 << NE << 30 << "b" << 2 ) produces the object + { a: { \$gt: 23.4, \$ne: 30 }, b: 2 }. +*/ +#define BSON(x) (( mongo::BSONObjBuilder(64) << x ).obj()) + +/** Use BSON_ARRAY macro like BSON macro, but without keys + + BSONArray arr = BSON_ARRAY( "hello" << 1 << BSON( "foo" << BSON_ARRAY( "bar" << "baz" << "qux" ) ) ); + + */ +#define BSON_ARRAY(x) (( mongo::BSONArrayBuilder() << x ).arr()) + + /* Utility class to auto assign object IDs. + Example: + cout << BSON( GENOID << "z" << 3 ); // { _id : ..., z : 3 } + */ + extern struct GENOIDLabeler { } GENOID; + + /* Utility class to add a Date element with the current time + Example: + cout << BSON( "created" << DATENOW ); // { created : "2009-10-09 11:41:42" } + */ + extern struct DateNowLabeler { } DATENOW; + + // Utility class to implement GT, GTE, etc as described above. + class Labeler { + public: + struct Label { + Label( const char *l ) : l_( l ) {} + const char *l_; + }; + Labeler( const Label &l, BSONObjBuilderValueStream *s ) : l_( l ), s_( s ) {} + template + BSONObjBuilder& operator<<( T value ); + + /* the value of the element e is appended i.e. for + "age" << GT << someElement + one gets + { age : { $gt : someElement's value } } + */ + BSONObjBuilder& operator<<( const BSONElement& e ); + private: + const Label &l_; + BSONObjBuilderValueStream *s_; + }; + + extern Labeler::Label GT; + extern Labeler::Label GTE; + extern Labeler::Label LT; + extern Labeler::Label LTE; + extern Labeler::Label NE; + extern Labeler::Label SIZE; + + + // $or helper: OR(BSON("x" << GT << 7), BSON("y" << LT << 6)); + // becomes : {$or: [{x: {$gt: 7}}, {y: {$lt: 6}}]} + inline BSONObj OR(const BSONObj& a, const BSONObj& b); + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c); + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d); + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e); + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e, const BSONObj& f); + // definitions in bsonobjbuilder.h b/c of incomplete types + + // Utility class to implement BSON( key << val ) as described above. + class BSONObjBuilderValueStream : public boost::noncopyable { + public: + friend class Labeler; + BSONObjBuilderValueStream( BSONObjBuilder * builder ); + + BSONObjBuilder& operator<<( const BSONElement& e ); + + template + BSONObjBuilder& operator<<( T value ); + + BSONObjBuilder& operator<<(DateNowLabeler& id); + + Labeler operator<<( const Labeler::Label &l ); + + void endField( const char *nextFieldName = 0 ); + bool subobjStarted() const { return _fieldName != 0; } + + private: + const char * _fieldName; + BSONObjBuilder * _builder; + + bool haveSubobj() const { return _subobj.get() != 0; } + BSONObjBuilder *subobj(); + auto_ptr< BSONObjBuilder > _subobj; + }; + + /** + used in conjuction with BSONObjBuilder, allows for proper buffer size to prevent crazy memory usage + */ + class BSONSizeTracker { + public: + BSONSizeTracker(){ + _pos = 0; + for ( int i=0; i= SIZE ) + _pos = 0; + } + + /** + * right now choosing largest size + */ + int getSize() const { + int x = 16; // sane min + for ( int i=0; i x ) + x = _sizes[i]; + } + return x; + } + + private: + enum { SIZE = 10 }; + int _pos; + int _sizes[SIZE]; + }; + +} diff -Nru mongodb-1.4.4/bson/bsonobjbuilder.h mongodb-1.6.3/bson/bsonobjbuilder.h --- mongodb-1.4.4/bson/bsonobjbuilder.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsonobjbuilder.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,749 @@ +/* bsonobjbuilder.h + + Classes in this file: + BSONObjBuilder + BSONArrayBuilder +*/ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +using namespace std; + +namespace mongo { + +#if defined(_WIN32) +// warning: 'this' : used in base member initializer list +#pragma warning( disable : 4355 ) +#endif + + template + class BSONFieldValue { + public: + BSONFieldValue( const string& name , const T& t ){ + _name = name; + _t = t; + } + + const T& value() const { return _t; } + const string& name() const { return _name; } + + private: + string _name; + T _t; + }; + + template + class BSONField { + public: + BSONField( const string& name , const string& longName="" ) + : _name(name), _longName(longName){} + const string& name() const { return _name; } + operator string() const { return _name; } + + BSONFieldValue make( const T& t ) const { + return BSONFieldValue( _name , t ); + } + + BSONFieldValue gt( const T& t ) const { return query( "$gt" , t ); } + BSONFieldValue lt( const T& t ) const { return query( "$lt" , t ); } + + BSONFieldValue query( const char * q , const T& t ) const; + + BSONFieldValue operator()( const T& t ) const { + return BSONFieldValue( _name , t ); + } + + private: + string _name; + string _longName; + }; + + /** Utility for creating a BSONObj. + See also the BSON() and BSON_ARRAY() macros. + */ + class BSONObjBuilder : boost::noncopyable { + public: + /** @param initsize this is just a hint as to the final size of the object */ + BSONObjBuilder(int initsize=512) : _b(_buf), _buf(initsize), _offset( 0 ), _s( this ) , _tracker(0) , _doneCalled(false) { + _b.skip(4); /*leave room for size field*/ + } + + /** @param baseBuilder construct a BSONObjBuilder using an existing BufBuilder */ + BSONObjBuilder( BufBuilder &baseBuilder ) : _b( baseBuilder ), _buf( 0 ), _offset( baseBuilder.len() ), _s( this ) , _tracker(0) , _doneCalled(false) { + _b.skip( 4 ); + } + + BSONObjBuilder( const BSONSizeTracker & tracker ) : _b(_buf) , _buf(tracker.getSize() ), _offset(0), _s( this ) , _tracker( (BSONSizeTracker*)(&tracker) ) , _doneCalled(false) { + _b.skip( 4 ); + } + + ~BSONObjBuilder(){ + if ( !_doneCalled && _b.buf() && _buf.getSize() == 0 ){ + _done(); + } + } + + /** add all the fields from the object specified to this object */ + BSONObjBuilder& appendElements(BSONObj x); + + /** append element to the object we are building */ + BSONObjBuilder& append( const BSONElement& e) { + assert( !e.eoo() ); // do not append eoo, that would corrupt us. the builder auto appends when done() is called. + _b.appendBuf((void*) e.rawdata(), e.size()); + return *this; + } + + /** append an element but with a new name */ + BSONObjBuilder& appendAs(const BSONElement& e, const StringData& fieldName) { + assert( !e.eoo() ); // do not append eoo, that would corrupt us. the builder auto appends when done() is called. + _b.appendNum((char) e.type()); + _b.appendStr(fieldName); + _b.appendBuf((void *) e.value(), e.valuesize()); + return *this; + } + + /** add a subobject as a member */ + BSONObjBuilder& append(const StringData& fieldName, BSONObj subObj) { + _b.appendNum((char) Object); + _b.appendStr(fieldName); + _b.appendBuf((void *) subObj.objdata(), subObj.objsize()); + return *this; + } + + /** add a subobject as a member */ + BSONObjBuilder& appendObject(const StringData& fieldName, const char * objdata , int size = 0 ){ + assert( objdata ); + if ( size == 0 ){ + size = *((int*)objdata); + } + + assert( size > 4 && size < 100000000 ); + + _b.appendNum((char) Object); + _b.appendStr(fieldName); + _b.appendBuf((void*)objdata, size ); + return *this; + } + + + /** add header for a new subobject and return bufbuilder for writing to + the subobject's body */ + BufBuilder &subobjStart(const StringData& fieldName) { + _b.appendNum((char) Object); + _b.appendStr(fieldName); + return _b; + } + + /** add a subobject as a member with type Array. Thus arr object should have "0", "1", ... + style fields in it. + */ + BSONObjBuilder& appendArray(const StringData& fieldName, const BSONObj &subObj) { + _b.appendNum((char) Array); + _b.appendStr(fieldName); + _b.appendBuf((void *) subObj.objdata(), subObj.objsize()); + return *this; + } + BSONObjBuilder& append(const StringData& fieldName, BSONArray arr) { + return appendArray(fieldName, arr); + } + + /** add header for a new subarray and return bufbuilder for writing to + the subarray's body */ + BufBuilder &subarrayStart(const StringData& fieldName) { + _b.appendNum((char) Array); + _b.appendStr(fieldName); + return _b; + } + + /** Append a boolean element */ + BSONObjBuilder& appendBool(const StringData& fieldName, int val) { + _b.appendNum((char) Bool); + _b.appendStr(fieldName); + _b.appendNum((char) (val?1:0)); + return *this; + } + + /** Append a boolean element */ + BSONObjBuilder& append(const StringData& fieldName, bool val) { + _b.appendNum((char) Bool); + _b.appendStr(fieldName); + _b.appendNum((char) (val?1:0)); + return *this; + } + + /** Append a 32 bit integer element */ + BSONObjBuilder& append(const StringData& fieldName, int n) { + _b.appendNum((char) NumberInt); + _b.appendStr(fieldName); + _b.appendNum(n); + return *this; + } + + /** Append a 32 bit unsigned element - cast to a signed int. */ + BSONObjBuilder& append(const StringData& fieldName, unsigned n) { + return append(fieldName, (int) n); + } + + /** Append a NumberLong */ + BSONObjBuilder& append(const StringData& fieldName, long long n) { + _b.appendNum((char) NumberLong); + _b.appendStr(fieldName); + _b.appendNum(n); + return *this; + } + + /** appends a number. if n < max(int)/2 then uses int, otherwise long long */ + BSONObjBuilder& appendIntOrLL( const StringData& fieldName , long long n ){ + long long x = n; + if ( x < 0 ) + x = x * -1; + if ( x < ( numeric_limits::max() / 2 ) ) + append( fieldName , (int)n ); + else + append( fieldName , n ); + return *this; + } + + /** + * appendNumber is a series of method for appending the smallest sensible type + * mostly for JS + */ + BSONObjBuilder& appendNumber( const StringData& fieldName , int n ){ + return append( fieldName , n ); + } + + BSONObjBuilder& appendNumber( const StringData& fieldName , double d ){ + return append( fieldName , d ); + } + + BSONObjBuilder& appendNumber( const StringData& fieldName , long long l ){ + static long long maxInt = (int)pow( 2.0 , 30.0 ); + static long long maxDouble = (long long)pow( 2.0 , 40.0 ); + + if ( l < maxInt ) + append( fieldName , (int)l ); + else if ( l < maxDouble ) + append( fieldName , (double)l ); + else + append( fieldName , l ); + return *this; + } + + /** Append a double element */ + BSONObjBuilder& append(const StringData& fieldName, double n) { + _b.appendNum((char) NumberDouble); + _b.appendStr(fieldName); + _b.appendNum(n); + return *this; + } + + /** tries to append the data as a number + * @return true if the data was able to be converted to a number + */ + bool appendAsNumber( const StringData& fieldName , const string& data ); + + /** Append a BSON Object ID (OID type). + @deprecated Generally, it is preferred to use the append append(name, oid) + method for this. + */ + BSONObjBuilder& appendOID(const StringData& fieldName, OID *oid = 0 , bool generateIfBlank = false ) { + _b.appendNum((char) jstOID); + _b.appendStr(fieldName); + if ( oid ) + _b.appendBuf( (void *) oid, 12 ); + else { + OID tmp; + if ( generateIfBlank ) + tmp.init(); + else + tmp.clear(); + _b.appendBuf( (void *) &tmp, 12 ); + } + return *this; + } + + /** + Append a BSON Object ID. + @param fieldName Field name, e.g., "_id". + @returns the builder object + */ + BSONObjBuilder& append( const StringData& fieldName, OID oid ) { + _b.appendNum((char) jstOID); + _b.appendStr(fieldName); + _b.appendBuf( (void *) &oid, 12 ); + return *this; + } + + /** + Generate and assign an object id for the _id field. + _id should be the first element in the object for good performance. + */ + BSONObjBuilder& genOID() { + return append("_id", OID::gen()); + } + + /** Append a time_t date. + @param dt a C-style 32 bit date value, that is + the number of seconds since January 1, 1970, 00:00:00 GMT + */ + BSONObjBuilder& appendTimeT(const StringData& fieldName, time_t dt) { + _b.appendNum((char) Date); + _b.appendStr(fieldName); + _b.appendNum(static_cast(dt) * 1000); + return *this; + } + /** Append a date. + @param dt a Java-style 64 bit date value, that is + the number of milliseconds since January 1, 1970, 00:00:00 GMT + */ + BSONObjBuilder& appendDate(const StringData& fieldName, Date_t dt) { + /* easy to pass a time_t to this and get a bad result. thus this warning. */ +#if defined(_DEBUG) && defined(MONGO_EXPOSE_MACROS) + if( dt > 0 && dt <= 0xffffffff ) { + static int n; + if( n++ == 0 ) + log() << "DEV WARNING appendDate() called with a tiny (but nonzero) date" << endl; + } +#endif + _b.appendNum((char) Date); + _b.appendStr(fieldName); + _b.appendNum(dt); + return *this; + } + BSONObjBuilder& append(const StringData& fieldName, Date_t dt) { + return appendDate(fieldName, dt); + } + + /** Append a regular expression value + @param regex the regular expression pattern + @param regex options such as "i" or "g" + */ + BSONObjBuilder& appendRegex(const StringData& fieldName, const char *regex, const char *options = "") { + _b.appendNum((char) RegEx); + _b.appendStr(fieldName); + _b.appendStr(regex); + _b.appendStr(options); + return *this; + } + /** Append a regular expression value + @param regex the regular expression pattern + @param regex options such as "i" or "g" + */ + BSONObjBuilder& appendRegex(const StringData& fieldName, string regex, string options = "") { + return appendRegex(fieldName, regex.c_str(), options.c_str()); + } + BSONObjBuilder& appendCode(const StringData& fieldName, const char *code) { + _b.appendNum((char) Code); + _b.appendStr(fieldName); + _b.appendNum((int) strlen(code)+1); + _b.appendStr(code); + return *this; + } + /** Append a string element. len DOES include terminating nul */ + BSONObjBuilder& append(const StringData& fieldName, const char *str, int len) { + _b.appendNum((char) String); + _b.appendStr(fieldName); + _b.appendNum((int)len); + _b.appendBuf(str, len); + return *this; + } + /** Append a string element */ + BSONObjBuilder& append(const StringData& fieldName, const char *str) { + return append(fieldName, str, (int) strlen(str)+1); + } + /** Append a string element */ + BSONObjBuilder& append(const StringData& fieldName, string str) { + return append(fieldName, str.c_str(), (int) str.size()+1); + } + BSONObjBuilder& appendSymbol(const StringData& fieldName, const char *symbol) { + _b.appendNum((char) Symbol); + _b.appendStr(fieldName); + _b.appendNum((int) strlen(symbol)+1); + _b.appendStr(symbol); + return *this; } + + /** Append a Null element to the object */ + BSONObjBuilder& appendNull( const StringData& fieldName ) { + _b.appendNum( (char) jstNULL ); + _b.appendStr( fieldName ); + return *this; } + + // Append an element that is less than all other keys. + BSONObjBuilder& appendMinKey( const StringData& fieldName ) { + _b.appendNum( (char) MinKey ); + _b.appendStr( fieldName ); + return *this; + } + // Append an element that is greater than all other keys. + BSONObjBuilder& appendMaxKey( const StringData& fieldName ) { + _b.appendNum( (char) MaxKey ); + _b.appendStr( fieldName ); + return *this; + } + + // Append a Timestamp field -- will be updated to next OpTime on db insert. + BSONObjBuilder& appendTimestamp( const StringData& fieldName ) { + _b.appendNum( (char) Timestamp ); + _b.appendStr( fieldName ); + _b.appendNum( (unsigned long long) 0 ); + return *this; + } + + BSONObjBuilder& appendTimestamp( const StringData& fieldName , unsigned long long val ) { + _b.appendNum( (char) Timestamp ); + _b.appendStr( fieldName ); + _b.appendNum( val ); + return *this; + } + + /** + Timestamps are a special BSON datatype that is used internally for replication. + Append a timestamp element to the object being ebuilt. + @param time - in millis (but stored in seconds) + */ + BSONObjBuilder& appendTimestamp( const StringData& fieldName , unsigned long long time , unsigned int inc ); + + /* + Append an element of the deprecated DBRef type. + @deprecated + */ + BSONObjBuilder& appendDBRef( const StringData& fieldName, const char *ns, const OID &oid ) { + _b.appendNum( (char) DBRef ); + _b.appendStr( fieldName ); + _b.appendNum( (int) strlen( ns ) + 1 ); + _b.appendStr( ns ); + _b.appendBuf( (void *) &oid, 12 ); + return *this; + } + + /** Append a binary data element + @param fieldName name of the field + @param len length of the binary data in bytes + @param subtype subtype information for the data. @see enum BinDataType in bsontypes.h. + Use BinDataGeneral if you don't care about the type. + @param data the byte array + */ + BSONObjBuilder& appendBinData( const StringData& fieldName, int len, BinDataType type, const char *data ) { + _b.appendNum( (char) BinData ); + _b.appendStr( fieldName ); + _b.appendNum( len ); + _b.appendNum( (char) type ); + _b.appendBuf( (void *) data, len ); + return *this; + } + BSONObjBuilder& appendBinData( const StringData& fieldName, int len, BinDataType type, const unsigned char *data ) { + return appendBinData(fieldName, len, type, (const char *) data); + } + + /** + Subtype 2 is deprecated. + Append a BSON bindata bytearray element. + @param data a byte array + @param len the length of data + */ + BSONObjBuilder& appendBinDataArrayDeprecated( const char * fieldName , const char * data , int len ){ + _b.appendNum( (char) BinData ); + _b.appendStr( fieldName ); + _b.appendNum( len + 4 ); + _b.appendNum( (char)0x2 ); + _b.appendNum( len ); + _b.appendBuf( (void *) data, len ); + return *this; + } + + /** Append to the BSON object a field of type CodeWScope. This is a javascript code + fragment accompanied by some scope that goes with it. + */ + BSONObjBuilder& appendCodeWScope( const StringData& fieldName, const char *code, const BSONObj &scope ) { + _b.appendNum( (char) CodeWScope ); + _b.appendStr( fieldName ); + _b.appendNum( ( int )( 4 + 4 + strlen( code ) + 1 + scope.objsize() ) ); + _b.appendNum( ( int ) strlen( code ) + 1 ); + _b.appendStr( code ); + _b.appendBuf( ( void * )scope.objdata(), scope.objsize() ); + return *this; + } + + void appendUndefined( const StringData& fieldName ) { + _b.appendNum( (char) Undefined ); + _b.appendStr( fieldName ); + } + + /* helper function -- see Query::where() for primary way to do this. */ + void appendWhere( const char *code, const BSONObj &scope ){ + appendCodeWScope( "$where" , code , scope ); + } + void appendWhere( const string &code, const BSONObj &scope ){ + appendWhere( code.c_str(), scope ); + } + + /** + these are the min/max when comparing, not strict min/max elements for a given type + */ + void appendMinForType( const StringData& fieldName , int type ); + void appendMaxForType( const StringData& fieldName , int type ); + + /** Append an array of values. */ + template < class T > + BSONObjBuilder& append( const StringData& fieldName, const vector< T >& vals ); + + template < class T > + BSONObjBuilder& append( const StringData& fieldName, const list< T >& vals ); + + /** The returned BSONObj will free the buffer when it is finished. */ + BSONObj obj() { + bool own = owned(); + massert( 10335 , "builder does not own memory", own ); + int l; + return BSONObj(decouple(l), true); + } + + /** Fetch the object we have built. + BSONObjBuilder still frees the object when the builder goes out of + scope -- very important to keep in mind. Use obj() if you + would like the BSONObj to last longer than the builder. + */ + BSONObj done() { + return BSONObj(_done()); + } + + // Like 'done' above, but does not construct a BSONObj to return to the caller. + void doneFast() { + (void)_done(); + } + + /** Peek at what is in the builder, but leave the builder ready for more appends. + The returned object is only valid until the next modification or destruction of the builder. + Intended use case: append a field if not already there. + */ + BSONObj asTempObj() { + BSONObj temp(_done()); + _b.setlen(_b.len()-1); //next append should overwrite the EOO + _doneCalled = false; + return temp; + } + + /* assume ownership of the buffer - you must then free it (with free()) */ + char* decouple(int& l) { + char *x = _done(); + assert( x ); + l = _b.len(); + _b.decouple(); + return x; + } + void decouple() { + _b.decouple(); // post done() call version. be sure jsobj frees... + } + + void appendKeys( const BSONObj& keyPattern , const BSONObj& values ); + + static string numStr( int i ) { + if (i>=0 && i<100) + return numStrs[i]; + StringBuilder o; + o << i; + return o.str(); + } + + /** Stream oriented way to add field names and values. */ + BSONObjBuilderValueStream &operator<<(const char * name ) { + _s.endField( name ); + return _s; + } + + /** Stream oriented way to add field names and values. */ + BSONObjBuilder& operator<<( GENOIDLabeler ) { return genOID(); } + + // prevent implicit string conversions which would allow bad things like BSON( BSON( "foo" << 1 ) << 2 ) + struct ForceExplicitString { + ForceExplicitString( const string &str ) : str_( str ) {} + string str_; + }; + + /** Stream oriented way to add field names and values. */ + BSONObjBuilderValueStream &operator<<( const ForceExplicitString& name ) { + return operator<<( name.str_.c_str() ); + } + + Labeler operator<<( const Labeler::Label &l ) { + massert( 10336 , "No subobject started", _s.subobjStarted() ); + return _s << l; + } + + template + BSONObjBuilderValueStream& operator<<( const BSONField& f ) { + _s.endField( f.name().c_str() ); + return _s; + } + + template + BSONObjBuilder& operator<<( const BSONFieldValue& v ) { + append( v.name().c_str() , v.value() ); + return *this; + } + + + /** @return true if we are using our own bufbuilder, and not an alternate that was given to us in our constructor */ + bool owned() const { return &_b == &_buf; } + + BSONObjIterator iterator() const ; + + private: + char* _done() { + if ( _doneCalled ) + return _b.buf() + _offset; + + _doneCalled = true; + _s.endField(); + _b.appendNum((char) EOO); + char *data = _b.buf() + _offset; + int size = _b.len() - _offset; + *((int*)data) = size; + if ( _tracker ) + _tracker->got( size ); + return data; + } + + BufBuilder &_b; + BufBuilder _buf; + int _offset; + BSONObjBuilderValueStream _s; + BSONSizeTracker * _tracker; + bool _doneCalled; + + static const string numStrs[100]; // cache of 0 to 99 inclusive + }; + + class BSONArrayBuilder : boost::noncopyable { + public: + BSONArrayBuilder() : _i(0), _b() {} + BSONArrayBuilder( BufBuilder &_b ) : _i(0), _b(_b) {} + + template + BSONArrayBuilder& append(const T& x){ + _b.append(num().c_str(), x); + return *this; + } + + BSONArrayBuilder& append(const BSONElement& e){ + _b.appendAs(e, num()); + return *this; + } + + template + BSONArrayBuilder& operator<<(const T& x){ + return append(x); + } + + void appendNull() { + _b.appendNull(num().c_str()); + } + + BSONArray arr(){ return BSONArray(_b.obj()); } + + BSONObj done() { return _b.done(); } + + void doneFast() { _b.doneFast(); } + + template + BSONArrayBuilder& append(const StringData& name, const T& x){ + fill( name ); + append( x ); + return *this; + } + + BufBuilder &subobjStart( const char *name = "0" ) { + fill( name ); + return _b.subobjStart( num().c_str() ); + } + + BufBuilder &subarrayStart( const char *name ) { + fill( name ); + return _b.subarrayStart( num().c_str() ); + } + + void appendArray( const StringData& name, BSONObj subObj ) { + fill( name ); + _b.appendArray( num().c_str(), subObj ); + } + + void appendAs( const BSONElement &e, const char *name ) { + fill( name ); + append( e ); + } + + private: + void fill( const StringData& name ) { + char *r; + int n = strtol( name.data(), &r, 10 ); + if ( *r ) + uasserted( 13048, (string)"can't append to array using string field name [" + name.data() + "]" ); + while( _i < n ) + append( nullElt() ); + } + + static BSONElement nullElt() { + static BSONObj n = nullObj(); + return n.firstElement(); + } + + static BSONObj nullObj() { + BSONObjBuilder _b; + _b.appendNull( "" ); + return _b.obj(); + } + + string num(){ return _b.numStr(_i++); } + int _i; + BSONObjBuilder _b; + }; + + template < class T > + inline BSONObjBuilder& BSONObjBuilder::append( const StringData& fieldName, const vector< T >& vals ) { + BSONObjBuilder arrBuilder; + for ( unsigned int i = 0; i < vals.size(); ++i ) + arrBuilder.append( numStr( i ), vals[ i ] ); + appendArray( fieldName, arrBuilder.done() ); + return *this; + } + + template < class T > + inline BSONObjBuilder& BSONObjBuilder::append( const StringData& fieldName, const list< T >& vals ) { + BSONObjBuilder arrBuilder; + int n = 0; + for( typename list< T >::const_iterator i = vals.begin(); i != vals.end(); i++ ) + arrBuilder.append( numStr(n++), *i ); + appendArray( fieldName, arrBuilder.done() ); + return *this; + } + + // $or helper: OR(BSON("x" << GT << 7), BSON("y" << LT 6)); + inline BSONObj OR(const BSONObj& a, const BSONObj& b) + { return BSON( "$or" << BSON_ARRAY(a << b) ); } + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c) + { return BSON( "$or" << BSON_ARRAY(a << b << c) ); } + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d) + { return BSON( "$or" << BSON_ARRAY(a << b << c << d) ); } + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e) + { return BSON( "$or" << BSON_ARRAY(a << b << c << d << e) ); } + inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e, const BSONObj& f) + { return BSON( "$or" << BSON_ARRAY(a << b << c << d << e << f) ); } + +} diff -Nru mongodb-1.4.4/bson/bsonobj.h mongodb-1.6.3/bson/bsonobj.h --- mongodb-1.4.4/bson/bsonobj.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsonobj.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,394 @@ +// @file bsonobj.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include "util/builder.h" +#include "stringdata.h" + +namespace mongo { + + typedef set< BSONElement, BSONElementCmpWithoutField > BSONElementSet; + + /** + C++ representation of a "BSON" object -- that is, an extended JSON-style + object in a binary representation. + + See bsonspec.org. + + Note that BSONObj's have a smart pointer capability built in -- so you can + pass them around by value. The reference counts used to implement this + do not use locking, so copying and destroying BSONObj's are not thread-safe + operations. + + BSON object format: + + code + {}* EOO + + totalSize includes itself. + + Data: + Bool: + EOO: nothing follows + Undefined: nothing follows + OID: an OID object + NumberDouble: + NumberInt: + String: + Date: <8bytes> + Regex: + Object: a nested object, leading with its entire size, which terminates with EOO. + Array: same as object + DBRef: + DBRef: a database reference: basically a collection name plus an Object ID + BinData: + Code: a function (not a closure): same format as String. + Symbol: a language symbol (say a python symbol). same format as String. + Code With Scope: + \endcode + */ + class BSONObj { + public: + /** Construct a BSONObj from data in the proper format. + @param ifree true if the BSONObj should free() the msgdata when + it destructs. + */ + explicit BSONObj(const char *msgdata, bool ifree = false) { + init(msgdata, ifree); + } + BSONObj(const Record *r); + /** Construct an empty BSONObj -- that is, {}. */ + BSONObj(); + // defensive + ~BSONObj() { _objdata = 0; } + + void appendSelfToBufBuilder(BufBuilder& b) const { + assert( objsize() ); + b.appendBuf(reinterpret_cast( objdata() ), objsize()); + } + + /** Readable representation of a BSON object in an extended JSON-style notation. + This is an abbreviated representation which might be used for logging. + */ + string toString( bool isArray = false, bool full=false ) const; + void toString(StringBuilder& s, bool isArray = false, bool full=false ) const; + + /** Properly formatted JSON string. + @param pretty if true we try to add some lf's and indentation + */ + string jsonString( JsonStringFormat format = Strict, int pretty = 0 ) const; + + /** note: addFields always adds _id even if not specified */ + int addFields(BSONObj& from, set& fields); /* returns n added */ + + /** returns # of top level fields in the object + note: iterates to count the fields + */ + int nFields() const; + + /** adds the field names to the fields set. does NOT clear it (appends). */ + int getFieldNames(set& fields) const; + + /** return has eoo() true if no match + supports "." notation to reach into embedded objects + */ + BSONElement getFieldDotted(const char *name) const; + /** return has eoo() true if no match + supports "." notation to reach into embedded objects + */ + BSONElement getFieldDotted(const string& name) const { + return getFieldDotted( name.c_str() ); + } + + /** Like getFieldDotted(), but expands multikey arrays and returns all matching objects + */ + void getFieldsDotted(const StringData& name, BSONElementSet &ret ) const; + /** Like getFieldDotted(), but returns first array encountered while traversing the + dotted fields of name. The name variable is updated to represent field + names with respect to the returned element. */ + BSONElement getFieldDottedOrArray(const char *&name) const; + + /** Get the field of the specified name. eoo() is true on the returned + element if not found. + */ + BSONElement getField(const StringData& name) const; + + /** Get the field of the specified name. eoo() is true on the returned + element if not found. + */ + BSONElement operator[] (const char *field) const { + return getField(field); + } + + BSONElement operator[] (const string& field) const { + return getField(field); + } + + BSONElement operator[] (int field) const { + StringBuilder ss; + ss << field; + string s = ss.str(); + return getField(s.c_str()); + } + + /** @return true if field exists */ + bool hasField( const char * name )const { + return ! getField( name ).eoo(); + } + + /** @return "" if DNE or wrong type */ + const char * getStringField(const char *name) const; + + /** @return subobject of the given name */ + BSONObj getObjectField(const char *name) const; + + /** @return INT_MIN if not present - does some type conversions */ + int getIntField(const char *name) const; + + /** @return false if not present */ + bool getBoolField(const char *name) const; + + /** + sets element field names to empty string + If a field in pattern is missing, it is omitted from the returned + object. + */ + BSONObj extractFieldsUnDotted(BSONObj pattern) const; + + /** extract items from object which match a pattern object. + e.g., if pattern is { x : 1, y : 1 }, builds an object with + x and y elements of this object, if they are present. + returns elements with original field names + */ + BSONObj extractFields(const BSONObj &pattern , bool fillWithNull=false) const; + + BSONObj filterFieldsUndotted(const BSONObj &filter, bool inFilter) const; + + BSONElement getFieldUsingIndexNames(const char *fieldName, const BSONObj &indexKey) const; + + /** @return the raw data of the object */ + const char *objdata() const { + return _objdata; + } + /** @return total size of the BSON object in bytes */ + int objsize() const { + return *(reinterpret_cast(objdata())); + } + + /** performs a cursory check on the object's size only. */ + bool isValid(); + + /** @return if the user is a valid user doc + criter: isValid() no . or $ field names + */ + bool okForStorage() const; + + /** @return true if object is empty -- i.e., {} */ + bool isEmpty() const { + return objsize() <= 5; + } + + void dump() const; + + /** Alternative output format */ + string hexDump() const; + + /**wo='well ordered'. fields must be in same order in each object. + Ordering is with respect to the signs of the elements + and allows ascending / descending key mixing. + @return <0 if l0 if l>r + */ + int woCompare(const BSONObj& r, const Ordering &o, + bool considerFieldName=true) const; + + /**wo='well ordered'. fields must be in same order in each object. + Ordering is with respect to the signs of the elements + and allows ascending / descending key mixing. + @return <0 if l0 if l>r + */ + int woCompare(const BSONObj& r, const BSONObj &ordering = BSONObj(), + bool considerFieldName=true) const; + + + bool operator<( const BSONObj& other ) const { return woCompare( other ) < 0; } + bool operator<=( const BSONObj& other ) const { return woCompare( other ) <= 0; } + bool operator>( const BSONObj& other ) const { return woCompare( other ) > 0; } + bool operator>=( const BSONObj& other ) const { return woCompare( other ) >= 0; } + + /** + * @param useDotted whether to treat sort key fields as possibly dotted and expand into them + */ + int woSortOrder( const BSONObj& r , const BSONObj& sortKey , bool useDotted=false ) const; + + /** This is "shallow equality" -- ints and doubles won't match. for a + deep equality test use woCompare (which is slower). + */ + bool woEqual(const BSONObj& r) const { + int os = objsize(); + if ( os == r.objsize() ) { + return (os == 0 || memcmp(objdata(),r.objdata(),os)==0); + } + return false; + } + + /** @return first field of the object */ + BSONElement firstElement() const { + return BSONElement(objdata() + 4); + } + + /** @return true if field exists in the object */ + bool hasElement(const char *name) const; + + /** Get the _id field from the object. For good performance drivers should + assure that _id is the first element of the object; however, correct operation + is assured regardless. + @return true if found + */ + bool getObjectID(BSONElement& e) const; + + /** makes a copy of the object. */ + BSONObj copy() const; + + /* make sure the data buffer is under the control of this BSONObj and not a remote buffer */ + BSONObj getOwned() const{ + if ( !isOwned() ) + return copy(); + return *this; + } + bool isOwned() const { return _holder.get() != 0; } + + /** @return A hash code for the object */ + int hash() const { + unsigned x = 0; + const char *p = objdata(); + for ( int i = 0; i < objsize(); i++ ) + x = x * 131 + p[i]; + return (x & 0x7fffffff) | 0x8000000; // must be > 0 + } + + // Return a version of this object where top level elements of types + // that are not part of the bson wire protocol are replaced with + // string identifier equivalents. + // TODO Support conversion of element types other than min and max. + BSONObj clientReadable() const; + + /** Return new object with the field names replaced by those in the + passed object. */ + BSONObj replaceFieldNames( const BSONObj &obj ) const; + + /** true unless corrupt */ + bool valid() const; + + /** @return an md5 value for this object. */ + string md5() const; + + bool operator==( const BSONObj& other ) const{ + return woCompare( other ) == 0; + } + + enum MatchType { + Equality = 0, + LT = 0x1, + LTE = 0x3, + GTE = 0x6, + GT = 0x4, + opIN = 0x8, // { x : { $in : [1,2,3] } } + NE = 0x9, + opSIZE = 0x0A, + opALL = 0x0B, + NIN = 0x0C, + opEXISTS = 0x0D, + opMOD = 0x0E, + opTYPE = 0x0F, + opREGEX = 0x10, + opOPTIONS = 0x11, + opELEM_MATCH = 0x12, + opNEAR = 0x13, + opWITHIN = 0x14, + opMAX_DISTANCE=0x15 + }; + + /** add all elements of the object to the specified vector */ + void elems(vector &) const; + /** add all elements of the object to the specified list */ + void elems(list &) const; + + /** add all values of the object to the specified vector. If type mismatches, exception. */ + template + void Vals(vector &) const; + /** add all values of the object to the specified list. If type mismatches, exception. */ + template + void Vals(list &) const; + + /** add all values of the object to the specified vector. If type mismatches, skip. */ + template + void vals(vector &) const; + /** add all values of the object to the specified list. If type mismatches, skip. */ + template + void vals(list &) const; + + friend class BSONObjIterator; + typedef BSONObjIterator iterator; + BSONObjIterator begin(); + +private: + class Holder { + public: + Holder( const char *objdata ) : + _objdata( objdata ) { + } + ~Holder() { + free((void *)_objdata); + _objdata = 0; + } + private: + const char *_objdata; + }; + const char *_objdata; + boost::shared_ptr< Holder > _holder; + void init(const char *data, bool ifree) { + if ( ifree ) + _holder.reset( new Holder( data ) ); + _objdata = data; + if ( ! isValid() ){ + StringBuilder ss; + int os = objsize(); + ss << "Invalid BSONObj spec size: " << os << " (" << toHex( &os, 4 ) << ")"; + try { + BSONElement e = firstElement(); + ss << " first element:" << e.toString() << " "; + } + catch ( ... ){} + string s = ss.str(); + massert( 10334 , s , 0 ); + } + } + }; + ostream& operator<<( ostream &s, const BSONObj &o ); + ostream& operator<<( ostream &s, const BSONElement &e ); + + struct BSONArray : BSONObj { + // Don't add anything other than forwarding constructors!!! + BSONArray(): BSONObj() {} + explicit BSONArray(const BSONObj& obj): BSONObj(obj) {} + }; + +} diff -Nru mongodb-1.4.4/bson/bsonobjiterator.h mongodb-1.6.3/bson/bsonobjiterator.h --- mongodb-1.4.4/bson/bsonobjiterator.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsonobjiterator.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,131 @@ +// bsonobjiterator.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // like the ## operator but works with __LINE__ + +namespace mongo { + /** iterator for a BSONObj + + Note each BSONObj ends with an EOO element: so you will get more() on an empty + object, although next().eoo() will be true. + + todo: we may want to make a more stl-like iterator interface for this + with things like begin() and end() + */ + class BSONObjIterator { + public: + /** Create an iterator for a BSON object. + */ + BSONObjIterator(const BSONObj& jso) { + int sz = jso.objsize(); + if ( sz == 0 ) { + _pos = _theend = 0; + return; + } + _pos = jso.objdata() + 4; + _theend = jso.objdata() + sz; + } + + BSONObjIterator( const char * start , const char * end ){ + _pos = start + 4; + _theend = end; + } + + /** @return true if more elements exist to be enumerated. */ + bool moreWithEOO() { + return _pos < _theend; + } + bool more(){ + return _pos < _theend && _pos[0]; + } + /** @return the next element in the object. For the final element, element.eoo() will be true. */ + BSONElement next( bool checkEnd = false ) { + assert( _pos < _theend ); + BSONElement e( _pos, checkEnd ? (int)(_theend - _pos) : -1 ); + _pos += e.size( checkEnd ? (int)(_theend - _pos) : -1 ); + return e; + } + + void operator++() { next(); } + void operator++(int) { next(); } + + BSONElement operator*() { + assert( _pos < _theend ); + return BSONElement(_pos, -1); + } + + private: + const char* _pos; + const char* _theend; + }; + + class BSONObjIteratorSorted { + public: + BSONObjIteratorSorted( const BSONObj& o ); + + ~BSONObjIteratorSorted(){ + assert( _fields ); + delete[] _fields; + _fields = 0; + } + + bool more(){ + return _cur < _nfields; + } + + BSONElement next(){ + assert( _fields ); + if ( _cur < _nfields ) + return BSONElement( _fields[_cur++] ); + return BSONElement(); + } + + private: + const char ** _fields; + int _nfields; + int _cur; + }; + +/** Similar to BOOST_FOREACH + * + * because the iterator is defined outside of the for, you must use {} around + * the surrounding scope. Don't do this: + * + * if (foo) + * BSONForEach(e, obj) + * doSomething(e); + * + * but this is OK: + * + * if (foo) { + * BSONForEach(e, obj) + * doSomething(e); + * } + * + */ + +#define BSONForEach(e, obj) \ + BSONObjIterator BOOST_PP_CAT(it_,__LINE__)(obj); \ + for ( BSONElement e; \ + (BOOST_PP_CAT(it_,__LINE__).more() ? \ + (e = BOOST_PP_CAT(it_,__LINE__).next(), true) : \ + false) ; \ + /*nothing*/ ) + +} diff -Nru mongodb-1.4.4/bson/bsontypes.h mongodb-1.6.3/bson/bsontypes.h --- mongodb-1.4.4/bson/bsontypes.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/bsontypes.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,107 @@ +// bsontypes.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "util/misc.h" + +namespace bson { } + +namespace mongo { + + using namespace std; + + class BSONArrayBuilder; + class BSONElement; + class BSONObj; + class BSONObjBuilder; + class BSONObjBuilderValueStream; + class BSONObjIterator; + class Ordering; + class Record; + struct BSONArray; // empty subclass of BSONObj useful for overloading + struct BSONElementCmpWithoutField; + + extern BSONObj maxKey; + extern BSONObj minKey; + +/** + the complete list of valid BSON types + see also bsonspec.org +*/ +enum BSONType { + /** smaller than all other types */ + MinKey=-1, + /** end of object */ + EOO=0, + /** double precision floating point value */ + NumberDouble=1, + /** character string, stored in utf8 */ + String=2, + /** an embedded object */ + Object=3, + /** an embedded array */ + Array=4, + /** binary data */ + BinData=5, + /** Undefined type */ + Undefined=6, + /** ObjectId */ + jstOID=7, + /** boolean type */ + Bool=8, + /** date type */ + Date=9, + /** null type */ + jstNULL=10, + /** regular expression, a pattern with options */ + RegEx=11, + /** deprecated / will be redesigned */ + DBRef=12, + /** deprecated / use CodeWScope */ + Code=13, + /** a programming language (e.g., Python) symbol */ + Symbol=14, + /** javascript code that can execute on the database server, with SavedContext */ + CodeWScope=15, + /** 32 bit signed integer */ + NumberInt = 16, + /** Updated to a Date with value next OpTime on insert */ + Timestamp = 17, + /** 64 bit integer */ + NumberLong = 18, + /** max type that is not MaxKey */ + JSTypeMax=18, + /** larger than all other types */ + MaxKey=127 +}; + + /* subtypes of BinData. + bdtCustom and above are ones that the JS compiler understands, but are + opaque to the database. + */ + enum BinDataType { + BinDataGeneral=0, + Function=1, + ByteArrayDeprecated=2, /* use BinGeneral instead */ + bdtUUID = 3, + MD5Type=5, + bdtCustom=128 + }; + +} + diff -Nru mongodb-1.4.4/bson/inline_decls.h mongodb-1.6.3/bson/inline_decls.h --- mongodb-1.4.4/bson/inline_decls.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/inline_decls.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,33 @@ +// inline.h + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#if defined(__GNUC__) + +#define NOINLINE_DECL __attribute__((noinline)) + +#elif defined(_MSC_VER) + +#define NOINLINE_DECL __declspec(noinline) + +#else + +#define NOINLINE_DECL + +#endif diff -Nru mongodb-1.4.4/bson/oid.h mongodb-1.6.3/bson/oid.h --- mongodb-1.4.4/bson/oid.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/oid.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,113 @@ +// oid.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../util/hex.h" + +namespace mongo { + +#pragma pack(1) + /** Object ID type. + BSON objects typically have an _id field for the object id. This field should be the first + member of the object when present. class OID is a special type that is a 12 byte id which + is likely to be unique to the system. You may also use other types for _id's. + When _id field is missing from a BSON object, on an insert the database may insert one + automatically in certain circumstances. + + Warning: You must call OID::newState() after a fork(). + */ + class OID { + union { + struct{ + long long a; + unsigned b; + }; + unsigned char data[12]; + }; + static unsigned _machine; + public: + /** call this after a fork */ + static void newState(); + + /** initialize to 'null' */ + void clear() { a = 0; b = 0; } + + const unsigned char *getData() const { return data; } + + bool operator==(const OID& r) { + return a==r.a&&b==r.b; + } + bool operator!=(const OID& r) { + return a!=r.a||b!=r.b; + } + + /** The object ID output as 24 hex digits. */ + string str() const { + return toHexLower(data, 12); + } + + string toString() const { return str(); } + + static OID gen() { OID o; o.init(); return o; } + + static unsigned staticMachine(){ return _machine; } + /** + sets the contents to a new oid / randomized value + */ + void init(); + + /** Set to the hex string value specified. */ + void init( string s ); + + /** Set to the min/max OID that could be generated at given timestamp. */ + void init( Date_t date, bool max=false ); + + time_t asTimeT(); + Date_t asDateT() { return asTimeT() * (long long)1000; } + + bool isSet() const { return a || b; } + + int compare( const OID& other ) const { return memcmp( data , other.data , 12 ); } + + bool operator<( const OID& other ) const { return compare( other ) < 0; } + }; +#pragma pack() + + ostream& operator<<( ostream &s, const OID &o ); + inline StringBuilder& operator<< (StringBuilder& s, const OID& o) { return (s << o.str()); } + + /** Formatting mode for generating JSON from BSON. + See + for details. + */ + enum JsonStringFormat { + /** strict RFC format */ + Strict, + /** 10gen format, which is close to JS format. This form is understandable by + javascript running inside the Mongo server via eval() */ + TenGen, + /** Javascript JSON compatible */ + JS + }; + + inline ostream& operator<<( ostream &s, const OID &o ) { + s << o.str(); + return s; + } + +} diff -Nru mongodb-1.4.4/bson/ordering.h mongodb-1.6.3/bson/ordering.h --- mongodb-1.4.4/bson/ordering.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/ordering.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,66 @@ +// ordering.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace mongo { + + /** A precomputation of a BSON key pattern. + The constructor is private to make conversion more explicit so we notice where we call make(). + Over time we should push this up higher and higher. + */ + class Ordering { + const unsigned bits; + const unsigned nkeys; + Ordering(unsigned b,unsigned n) : bits(b),nkeys(n) { } + public: + /** so, for key pattern { a : 1, b : -1 } + get(0) == 1 + get(1) == -1 + */ + int get(int i) const { + return ((1 << i) & bits) ? -1 : 1; + } + + // for woCompare... + unsigned descending(unsigned mask) const { return bits & mask; } + + operator string() const { + StringBuilder buf(32); + for ( unsigned i=0; i 0 ? "+" : "-" ); + return buf.str(); + } + + static Ordering make(const BSONObj& obj) { + unsigned b = 0; + BSONObjIterator k(obj); + unsigned n = 0; + while( 1 ) { + BSONElement e = k.next(); + if( e.eoo() ) + break; + uassert( 13103, "too many compound keys", n <= 31 ); + if( e.number() < 0 ) + b |= (1 << n); + n++; + } + return Ordering(b,n); + } + }; + +} diff -Nru mongodb-1.4.4/bson/README mongodb-1.6.3/bson/README --- mongodb-1.4.4/bson/README 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/README 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,7 @@ +"BSON" stands for "binary JSON" - a binary storage format that is JSON inspired +(and adds a couple extra types such as Date). + +This is the C++ implementation. Implementations which translate BSON<->JSON +are available for most languages at bsonspec.org. + + diff -Nru mongodb-1.4.4/bson/stringdata.h mongodb-1.6.3/bson/stringdata.h --- mongodb-1.4.4/bson/stringdata.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/stringdata.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,64 @@ +// stringdata.h + +/* Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BSON_STRINDATA_HEADER +#define BSON_STRINDATA_HEADER + +#include +#include + +namespace mongo { + + using std::string; + + class StringData { + public: + StringData( const char* c ) + : _data(c), _size((unsigned) strlen(c)) {} + + StringData( const string& s ) + : _data(s.c_str()), _size((unsigned) s.size()) {} + + struct LiteralTag {}; + template + StringData( const char (&val)[N], LiteralTag ) + : _data(&val[0]), _size(N-1) {} + + // Construct a StringData explicitly, for the case where the + // length of the string is already known. 'c' must be a + // pointer to a null-terminated string, and strlenOfc must be + // the length that std::strlen(c) would return, a.k.a the + // index of the terminator in c. + StringData( const char* c, size_t strlenOfc ) + : _data(c), _size((unsigned) strlenOfc) {} + + const char* const data() const { return _data; } + const unsigned size() const { return _size; } + + private: + // TODO - Hook this class up in the BSON machinery + // There are two assumptions here that we may want to review then. + // '_data' *always* finishes with a null terminator + // 'size' does *not* account for the null terminator + // These assumptions may make it easier to minimize changes to existing code + const char* const _data; + const unsigned _size; + }; + +} // namespace mongo + +#endif // BSON_STRINGDATA_HEADER diff -Nru mongodb-1.4.4/bson/util/atomic_int.h mongodb-1.6.3/bson/util/atomic_int.h --- mongodb-1.4.4/bson/util/atomic_int.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/util/atomic_int.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,99 @@ +// atomic_int.h +// atomic wrapper for unsigned + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if defined(_WIN32) +# include +#endif + +namespace mongo { + + struct AtomicUInt{ + AtomicUInt() : x(0) {} + AtomicUInt(unsigned z) : x(z) { } + volatile unsigned x; + operator unsigned() const { + return x; + } + inline AtomicUInt operator++(); // ++prefix + inline AtomicUInt operator++(int);// postfix++ + inline AtomicUInt operator--(); // --prefix + inline AtomicUInt operator--(int); // postfix-- + }; + +#if defined(_WIN32) + AtomicUInt AtomicUInt::operator++(){ + // InterlockedIncrement returns the new value + return InterlockedIncrement((volatile long*)&x); //long is 32bits in Win64 + } + AtomicUInt AtomicUInt::operator++(int){ + return InterlockedIncrement((volatile long*)&x)-1; + } + AtomicUInt AtomicUInt::operator--(){ + return InterlockedDecrement((volatile long*)&x); + } + AtomicUInt AtomicUInt::operator--(int){ + return InterlockedDecrement((volatile long*)&x)+1; + } +#elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + // this is in GCC >= 4.1 + AtomicUInt AtomicUInt::operator++(){ + return __sync_add_and_fetch(&x, 1); + } + AtomicUInt AtomicUInt::operator++(int){ + return __sync_fetch_and_add(&x, 1); + } + AtomicUInt AtomicUInt::operator--(){ + return __sync_add_and_fetch(&x, -1); + } + AtomicUInt AtomicUInt::operator--(int){ + return __sync_fetch_and_add(&x, -1); + } +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + // from boost 1.39 interprocess/detail/atomic.hpp + + inline unsigned atomic_int_helper(volatile unsigned *x, int val){ + int r; + asm volatile + ( + "lock\n\t" + "xadd %1, %0": + "+m"( *x ), "=r"( r ): // outputs (%0, %1) + "1"( val ): // inputs (%2 == %1) + "memory", "cc" // clobbers + ); + return r; + } + AtomicUInt AtomicUInt::operator++(){ + return atomic_int_helper(&x, 1)+1; + } + AtomicUInt AtomicUInt::operator++(int){ + return atomic_int_helper(&x, 1); + } + AtomicUInt AtomicUInt::operator--(){ + return atomic_int_helper(&x, -1)-1; + } + AtomicUInt AtomicUInt::operator--(int){ + return atomic_int_helper(&x, -1); + } +#else +# error "unsupported compiler or platform" +#endif + +} // namespace mongo diff -Nru mongodb-1.4.4/bson/util/builder.h mongodb-1.6.3/bson/util/builder.h --- mongodb-1.4.4/bson/util/builder.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/util/builder.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,238 @@ +/* builder.h */ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "../inline_decls.h" +#include "../stringdata.h" + +namespace mongo { + + class StringBuilder; + + void msgasserted(int msgid, const char *msg); + + class BufBuilder { + public: + BufBuilder(int initsize = 512) : size(initsize) { + if ( size > 0 ) { + data = (char *) malloc(size); + if( data == 0 ) + msgasserted(10000, "out of memory BufBuilder"); + } else { + data = 0; + } + l = 0; + } + ~BufBuilder() { + kill(); + } + + void kill() { + if ( data ) { + free(data); + data = 0; + } + } + + void reset( int maxSize = 0 ){ + l = 0; + if ( maxSize && size > maxSize ){ + free(data); + data = (char*)malloc(maxSize); + size = maxSize; + } + } + + /* leave room for some stuff later */ + char* skip(int n) { return grow(n); } + + /* note this may be deallocated (realloced) if you keep writing. */ + char* buf() { return data; } + const char* buf() const { return data; } + + /* assume ownership of the buffer - you must then free() it */ + void decouple() { data = 0; } + + void appendChar(char j){ + *((char*)grow(sizeof(char))) = j; + } + void appendNum(char j){ + *((char*)grow(sizeof(char))) = j; + } + void appendNum(short j) { + *((short*)grow(sizeof(short))) = j; + } + void appendNum(int j) { + *((int*)grow(sizeof(int))) = j; + } + void appendNum(unsigned j) { + *((unsigned*)grow(sizeof(unsigned))) = j; + } + void appendNum(bool j) { + *((bool*)grow(sizeof(bool))) = j; + } + void appendNum(double j) { + *((double*)grow(sizeof(double))) = j; + } + void appendNum(long long j) { + *((long long*)grow(sizeof(long long))) = j; + } + void appendNum(unsigned long long j) { + *((unsigned long long*)grow(sizeof(unsigned long long))) = j; + } + + void appendBuf(const void *src, size_t len) { + memcpy(grow((int) len), src, len); + } + + void appendStr(const StringData &str , bool includeEOO = true ) { + const int len = str.size() + ( includeEOO ? 1 : 0 ); + memcpy(grow(len), str.data(), len); + } + + int len() const { + return l; + } + + void setlen( int newLen ){ + l = newLen; + } + + /* returns the pre-grow write position */ + inline char* grow(int by) { + int oldlen = l; + l += by; + if ( l > size ) { + grow_reallocate(); + } + return data + oldlen; + } + + int getSize() const { return size; } + + private: + /* "slow" portion of 'grow()' */ + void NOINLINE_DECL grow_reallocate(){ + int a = size * 2; + if ( a == 0 ) + a = 512; + if ( l > a ) + a = l + 16 * 1024; + if( a > 64 * 1024 * 1024 ) + msgasserted(10000, "BufBuilder grow() > 64MB"); + data = (char *) realloc(data, a); + size= a; + } + + char *data; + int l; + int size; + + friend class StringBuilder; + }; + +#if defined(_WIN32) +#pragma warning( disable : 4996 ) +#endif + + class StringBuilder { + public: + StringBuilder( int initsize=256 ) + : _buf( initsize ){ + } + +#define SBNUM(val,maxSize,macro) \ + int prev = _buf.l; \ + int z = sprintf( _buf.grow(maxSize) , macro , (val) ); \ + assert( z >= 0 ); \ + _buf.l = prev + z; \ + return *this; + + StringBuilder& operator<<( double x ){ + SBNUM( x , 25 , "%g" ); + } + StringBuilder& operator<<( int x ){ + SBNUM( x , 11 , "%d" ); + } + StringBuilder& operator<<( unsigned x ){ + SBNUM( x , 11 , "%u" ); + } + StringBuilder& operator<<( long x ){ + SBNUM( x , 22 , "%ld" ); + } + StringBuilder& operator<<( unsigned long x ){ + SBNUM( x , 22 , "%lu" ); + } + StringBuilder& operator<<( long long x ){ + SBNUM( x , 22 , "%lld" ); + } + StringBuilder& operator<<( unsigned long long x ){ + SBNUM( x , 22 , "%llu" ); + } + StringBuilder& operator<<( short x ){ + SBNUM( x , 8 , "%hd" ); + } + StringBuilder& operator<<( char c ){ + _buf.grow( 1 )[0] = c; + return *this; + } +#undef SBNUM + + void appendDoubleNice( double x ){ + int prev = _buf.l; + char * start = _buf.grow( 32 ); + int z = sprintf( start , "%.16g" , x ); + assert( z >= 0 ); + _buf.l = prev + z; + if( strchr(start, '.') == 0 && strchr(start, 'E') == 0 && strchr(start, 'N') == 0 ){ + write( ".0" , 2 ); + } + } + + void write( const char* buf, int len){ + memcpy( _buf.grow( len ) , buf , len ); + } + + void append( const StringData& str ){ + memcpy( _buf.grow( str.size() ) , str.data() , str.size() ); + } + StringBuilder& operator<<( const StringData& str ){ + append( str ); + return *this; + } + + // access + + void reset( int maxSize = 0 ){ + _buf.reset( maxSize ); + } + + std::string str(){ + return std::string(_buf.data, _buf.l); + } + + private: + BufBuilder _buf; + }; + +} // namespace mongo diff -Nru mongodb-1.4.4/bson/util/misc.h mongodb-1.6.3/bson/util/misc.h --- mongodb-1.4.4/bson/util/misc.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/bson/util/misc.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,94 @@ +/* @file util.h +*/ + +/* + * Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace mongo { + + using namespace std; + + inline void time_t_to_String(time_t t, char *buf) { +#if defined(_WIN32) + ctime_s(buf, 32, &t); +#else + ctime_r(&t, buf); +#endif + buf[24] = 0; // don't want the \n + } + + inline string time_t_to_String(time_t t = time(0) ){ + char buf[64]; +#if defined(_WIN32) + ctime_s(buf, sizeof(buf), &t); +#else + ctime_r(&t, buf); +#endif + buf[24] = 0; // don't want the \n + return buf; + } + + inline string time_t_to_String_no_year(time_t t) { + char buf[64]; +#if defined(_WIN32) + ctime_s(buf, sizeof(buf), &t); +#else + ctime_r(&t, buf); +#endif + buf[19] = 0; + return buf; + } + + inline string time_t_to_String_short(time_t t) { + char buf[64]; +#if defined(_WIN32) + ctime_s(buf, sizeof(buf), &t); +#else + ctime_r(&t, buf); +#endif + buf[19] = 0; + if( buf[0] && buf[1] && buf[2] && buf[3] ) + return buf + 4; // skip day of week + return buf; + } + + struct Date_t { + // TODO: make signed (and look for related TODO's) + unsigned long long millis; + Date_t(): millis(0) {} + Date_t(unsigned long long m): millis(m) {} + operator unsigned long long&() { return millis; } + operator const unsigned long long&() const { return millis; } + string toString() const { + char buf[64]; + time_t_to_String(millis/1000, buf); + return buf; + } + }; + + // Like strlen, but only scans up to n bytes. + // Returns -1 if no '0' found. + inline int strnlen( const char *s, int n ) { + for( int i = 0; i < n; ++i ) + if ( !s[ i ] ) + return i; + return -1; + } +} diff -Nru mongodb-1.4.4/buildscripts/bb.py mongodb-1.6.3/buildscripts/bb.py --- mongodb-1.4.4/buildscripts/bb.py 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/buildscripts/bb.py 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ print( "excpted version [" + m + "]" ) from subprocess import Popen, PIPE - diff = Popen( [ "git", "diff", "origin/v1.4" ], stdout=PIPE ).communicate()[ 0 ] + diff = Popen( [ "git", "diff", "origin/v1.2" ], stdout=PIPE ).communicate()[ 0 ] if len(diff) > 0: print( diff ) raise Exception( "build bot broken?" ) diff -Nru mongodb-1.4.4/buildscripts/buildboost64.bat mongodb-1.6.3/buildscripts/buildboost64.bat --- mongodb-1.4.4/buildscripts/buildboost64.bat 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/buildscripts/buildboost64.bat 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,61 @@ +@echo off + +rem 64 bit version +rem address-model=64 + +rem run +rem bin\bjam --clean +rem if you switch compilers etc. + +cls +echo This script builds the (64 bit) boost libs that MongoDB requires on Windows. +echo We assume boost source is in machine's \boost directory. +echo You can get boost at www.boost.org. +echo . +echo Note: you will want boost v1.42 or higher with VS2010. +echo . +echo We assume you have bjam. To build bjam: +echo cd tools\jam\src +echo build.bat +echo . + +cd \boost +echo bin\bjam --version +bin\bjam --version + +echo . +echo . +echo . +echo About to build release libraries +pause +cls +bin\bjam --build-dir=c:\temp\boost64 address-model=64 variant=release runtime-link=static link=static --with-filesystem --with-thread --with-date_time --with-program_options --layout=versioned threading=multi toolset=msvc +echo . +echo . +echo . +echo About to try to move libs from /boost/stage/lib to /boost/lib/ +pause +cls +rem bjam makes extra copies without the ver #; we kill those: +del stage\lib\*s.lib +move stage\lib\* lib\ + +echo . +echo . +echo . +echo About to build debug libraries +pause +cls +bin\bjam --build-dir=c:\temp\boost64 address-model=64 variant=debug --with-filesystem --with-thread --with-date_time --with-program_options --layout=versioned threading=multi toolset=msvc + +echo . +echo . +echo . +echo About to try to move libs from /boost/stage/lib to /boost/lib/ +pause +cls +rem bjam makes extra copies without the ver #; we kill those: +del stage\lib\*-gd.lib +move stage\lib\* lib\ + +echo Done - try running "dir \boost\lib\" diff -Nru mongodb-1.4.4/buildscripts/buildboost.bat mongodb-1.6.3/buildscripts/buildboost.bat --- mongodb-1.4.4/buildscripts/buildboost.bat 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/buildscripts/buildboost.bat 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,54 @@ +@echo off + +cls +echo This script builds the boost libs that MongoDB requires on Windows. +echo We assume boost source is in machine's \boost directory. +echo You can get boost at www.boost.org. +echo . +echo Note: you will want boost v1.42 or higher with VS2010. +echo . +echo We assume you have bjam. To build bjam: +echo cd tools\jam\src +echo build.bat +echo . + +cd \boost +echo bin\bjam --version +bin\bjam --version + +echo . +echo . +echo . +echo About to build release libraries +pause +cls +bin\bjam variant=release runtime-link=static link=static --with-filesystem --with-thread --with-date_time --with-program_options --layout=versioned threading=multi toolset=msvc +echo . +echo . +echo . +echo About to try to move libs from /boost/stage/lib to /boost/lib/ +pause +cls +rem bjam makes extra copies without the ver #; we kill those: +del stage\lib\*s.lib +move stage\lib\* lib\ + +echo . +echo . +echo . +echo About to build debug libraries +pause +cls +bin\bjam variant=debug --with-filesystem --with-thread --with-date_time --with-program_options --layout=versioned threading=multi toolset=msvc + +echo . +echo . +echo . +echo About to try to move libs from /boost/stage/lib to /boost/lib/ +pause +cls +rem bjam makes extra copies without the ver #; we kill those: +del stage\lib\*-gd.lib +move stage\lib\* lib\ + +echo Done - try running "dir \boost\lib\" diff -Nru mongodb-1.4.4/buildscripts/cleanbb.py mongodb-1.6.3/buildscripts/cleanbb.py --- mongodb-1.4.4/buildscripts/cleanbb.py 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/buildscripts/cleanbb.py 2010-09-24 10:02:42.000000000 -0700 @@ -3,17 +3,39 @@ import os import utils import time +from optparse import OptionParser + +cwd = os.getcwd(); +if cwd.find("buildscripts" ) > 0 : + cwd = cwd.partition( "buildscripts" )[0] + +print( "cwd [" + cwd + "]" ) + +def shouldKill( c ): + if c.find( cwd ) >= 0: + return True + + if ( c.find( "buildbot" ) >= 0 or c.find( "slave" ) ) and c.find( "/mongo/" ) >= 0: + return True + + return False def killprocs( signal="" ): - cwd = os.getcwd(); - if cwd.find("buildscripts" ) > 0 : - cwd = cwd.partition( "buildscripts" )[0] killed = 0 - for x in utils.getprocesslist(): + l = utils.getprocesslist() + print( "num procs:" + str( len( l ) ) ) + if len(l) == 0: + print( "no procs" ) + try: + print( execsys( "/sbin/ifconfig -a" ) ) + except Exception,e: + print( "can't get interfaces" + str( e ) ) + + for x in l: x = x.lstrip() - if x.find( cwd ) < 0: + if not shouldKill( x ): continue pid = x.partition( " " )[0] @@ -24,20 +46,31 @@ return killed -def cleanup( root ): +def cleanup( root , nokill ): + if nokill: + print "nokill requested, not killing anybody" + else: + if killprocs() > 0: + time.sleep(3) + killprocs("-9") + # delete all regular files, directories can stay # NOTE: if we delete directories later, we can't delete diskfulltest for ( dirpath , dirnames , filenames ) in os.walk( root , topdown=False ): for x in filenames: - os.remove( dirpath + "/" + x ) + foo = dirpath + "/" + x + print( "removing: " + foo ) + os.remove( foo ) - if killprocs() > 0: - time.sleep(3) - killprocs("-9") if __name__ == "__main__": + parser = OptionParser(usage="read the script") + parser.add_option("--nokill", dest='nokill', default=False, action='store_true') + (options, args) = parser.parse_args() + root = "/data/db/" - if len( sys.argv ) > 1: - root = sys.argv[1] - cleanup( root ) + if len(args) > 0: + root = args[0] + + cleanup( root , options.nokill ) diff -Nru mongodb-1.4.4/buildscripts/confluence_export.py mongodb-1.6.3/buildscripts/confluence_export.py --- mongodb-1.4.4/buildscripts/confluence_export.py 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/buildscripts/confluence_export.py 2010-09-24 10:02:42.000000000 -0700 @@ -14,10 +14,15 @@ import subprocess import sys import urllib2 +sys.path[0:0] = [""] +import simples3 from suds.client import Client -SOAP_URI = "http://mongodb.onconfluence.com/rpc/soap-axis/confluenceservice-v1?wsdl" +import settings + +HTML_URI = "http://mongodb.onconfluence.com/rpc/soap-axis/confluenceservice-v1?wsdl" +PDF_URI = "http://www.mongodb.org/rpc/soap-axis/pdfexport?wsdl" USERNAME = "soap" PASSWORD = "soap" AUTH_URI = "http://www.mongodb.org/login.action?os_authType=basic" @@ -25,12 +30,18 @@ TMP_FILE = "confluence-tmp.zip" -def export_and_get_uri(): - client = Client(SOAP_URI) +def export_html_and_get_uri(): + client = Client(HTML_URI) auth = client.service.login(USERNAME, PASSWORD) return client.service.exportSpace(auth, "DOCS", "TYPE_HTML") +def export_pdf_and_get_uri(): + client = Client(PDF_URI) + auth = client.service.login(USERNAME, PASSWORD) + return client.service.exportSpace(auth, "DOCS") + + def login_and_download(docs): cookie_jar = cookielib.CookieJar() cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar) @@ -69,11 +80,21 @@ os.symlink(os.path.abspath(target), os.path.abspath(current)) +def write_to_s3(pdf): + s3 = simples3.S3Bucket(settings.bucket, settings.id, settings.key) + name = "docs/mongodb-docs-%s.pdf" % datetime.date.today() + s3.put(name, pdf, acl="public-read") + + def main(dir): + # HTML rmdir(TMP_DIR) - extract_to_dir(login_and_download(export_and_get_uri()), TMP_DIR) + extract_to_dir(login_and_download(export_html_and_get_uri()), TMP_DIR) overwrite("%s/DOCS/" % TMP_DIR, dir) + # PDF + write_to_s3(login_and_download(export_pdf_and_get_uri()).read()) + if __name__ == "__main__": try: diff -Nru mongodb-1.4.4/buildscripts/distmirror.py mongodb-1.6.3/buildscripts/distmirror.py --- mongodb-1.4.4/buildscripts/distmirror.py 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/buildscripts/distmirror.py 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,139 @@ +#!/usr/bin/python + +# Download mongodb stuff (at present builds, sources, docs, but not +# drivers). + +# Usage: [directory] # directory defaults to cwd. + +# FIXME: this script is fairly sloppy. +import sys +import os +import urllib2 +import time +import hashlib +import warnings + +written_files = [] +def get(url, filename): + # A little safety check. + if filename in written_files: + raise Exception('not overwriting file %s (already written in this session)' % filename) + else: + written_files.append(filename) + print "downloading %s to %s" % (url, filename) + open(filename, 'w').write(urllib2.urlopen(url).read()) + + +def checkmd5(md5str, filename): + m = hashlib.md5() + m.update(open(filename, 'rb').read()) + d = m.hexdigest() + if d != md5str: + warnings.warn("md5sum mismatch for file %s: wanted %s; got %s" % (filename, md5str, d)) + +osarches=(("osx", ("i386", "i386-tiger", "x86_64"), ("tgz", )), + ("linux", ("i686", "x86_64"), ("tgz", )), + ("win32", ("i386", "x86_64"), ("zip", )), + ("sunos5", ("i86pc", "x86_64"), ("tgz", )), + ("src", ("src", ), ("tar.gz", "zip")), ) + +# KLUDGE: this will need constant editing. +versions = ("1.4.2", "1.5.1", "latest") + +url_format = "http://downloads.mongodb.org/%s/mongodb-%s-%s.%s" +filename_format = "mongodb-%s-%s.%s" + +def core_server(): + for version in versions: + for (os, architectures, archives) in osarches: + for architecture in architectures: + for archive in archives: + osarch = os + '-' + architecture if architecture != 'src' else 'src' + # ugh. + if architecture == 'src' and version == 'latest': + if archive == 'tar.gz': + archive2 = 'tarball' + elif archive == 'zip': + archive2 == 'zipball' + url = "http://github.com/mongodb/mongo/"+archive2+"/master" + version2 = "master" + else: + version2 = version if architecture != 'src' else 'r'+version + url = url_format % (os, osarch, version2, archive) + # ugh ugh + md5url = url+'.md5' if architecture != 'src' else None + filename = filename_format % (osarch, version2, archive) + get(url, filename) + if md5url: + print "fetching md5 url " + md5url + md5str = urllib2.urlopen(md5url).read() + checkmd5(md5str, filename) + +def drivers(): + # Drivers... FIXME: drivers. + driver_url_format = "http://github.com/mongodb/mongo-%s-driver/%s/%s" + driver_filename_format = "mongo-%s-driver-%s.%s" + drivers=(("python", ("1.6", "master"), ("zipball", "tarball"), None), + ("ruby", ("0.20", "master"), ("zipball", "tarball"), None), + ("c", ("v0.1", "master"), ("zipball", "tarball"), None), + # FIXME: PHP, Java, and Csharp also have zips and jars of + # precompiled relesaes. + ("php", ("1.0.6", "master"), ("zipball", "tarball"), None), + ("java", ("r1.4", "r2.0rc1", "master"), ("zipball", "tarball"), None), + # And Csharp is in a different github place, too. + ("csharp", ("0.82.2", "master"), ("zipball", "tarball"), + "http://github.com/samus/mongodb-%s/%s/%s"), + ) + + for (lang, releases, archives, url_format) in drivers: + for release in releases: + for archive in archives: + url = (url_format if url_format else driver_url_format) % (lang, archive, release) + if archive == 'zipball': + extension = 'zip' + elif archive == 'tarball': + extension = 'tgz' + else: + raise Exception('unknown archive format %s' % archive) + filename = driver_filename_format % (lang, release, extension) + get(url, filename) + # ugh ugh ugh + if lang == 'csharp' and release != 'master': + url = 'http://github.com/downloads/samus/mongodb-csharp/MongoDBDriver-Release-%.zip' % (release) + filename = 'MongoDBDriver-Release-%.zip' % (release) + get(url, filename) + if lang == 'java' and release != 'master': + get('http://github.com/downloads/mongodb/mongo-java-driver/mongo-%s.jar' % (release), 'mongo-%s.jar' % (release)) + # I have no idea what's going on with the PHP zipfiles. + if lang == 'php' and release == '1.0.6': + get('http://github.com/downloads/mongodb/mongo-php-driver/mongo-1.0.6-php5.2-osx.zip', 'mongo-1.0.6-php5.2-osx.zip') + get('http://github.com/downloads/mongodb/mongo-php-driver/mongo-1.0.6-php5.3-osx.zip', 'mongo-1.0.6-php5.3-osx.zip') + +def docs(): + # FIXME: in principle, the doc PDFs could be out of date. + docs_url = time.strftime("http://downloads.mongodb.org/docs/mongodb-docs-%Y-%m-%d.pdf") + docs_filename = time.strftime("mongodb-docs-%Y-%m-%d.pdf") + get(docs_url, docs_filename) + +def extras(): + # Extras + extras = ("http://media.mongodb.org/zips.json", ) + for extra in extras: + if extra.rfind('/') > -1: + filename = extra[extra.rfind('/')+1:] + else: + raise Exception('URL %s lacks a slash?' % extra) + get(extra, filename) + +if len(sys.argv) > 1: + dir=sys.argv[1] + os.makedirs(dir) + os.chdir(dir) + +print """NOTE: the md5sums for all the -latest tarballs are out of +date. You will probably see warnings as this script runs. (If you +don't, feel free to delete this note.)""" +core_server() +drivers() +docs() +extras() diff -Nru mongodb-1.4.4/buildscripts/docs.py mongodb-1.6.3/buildscripts/docs.py --- mongodb-1.4.4/buildscripts/docs.py 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/buildscripts/docs.py 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,120 @@ +"""Build the C++ client docs and the MongoDB server docs. +""" + +from __future__ import with_statement +import os +import shutil +import socket +import subprocess +import time +import urllib2 + +import markdown + + +def clean_dir(dir): + try: + shutil.rmtree(dir) + except: + pass + os.makedirs(dir) + + +def convert_dir(source, dest): + clean_dir(dest) + + for x in os.listdir(source + "/"): + if not x.endswith(".md"): + continue + + with open("%s/%s" % (source, x)) as f: + raw = f.read() + + html = markdown.markdown(raw) + print(x) + + with open("%s/%s" % (dest, x.replace(".md", ".html")), 'w') as o: + o.write(html) + + +def check_mongo(): + sock = socket.socket() + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + sock.settimeout(1) + sock.connect(("localhost", 31999)) + sock.close() + +def did_mongod_start(timeout=20): + while timeout > 0: + time.sleep(1) + try: + check_mongo() + return True + except Exception,e: + print e + timeout = timeout - 1 + return False + +def stop(proc): + try: + proc.terminate() + except AttributeError: + os.kill(proc.pid, 15) + +def commands_list(out): + clean_dir("dummy_data_dir") + with open("/dev/null") as null: + try: + p = subprocess.Popen(["./mongod", "--dbpath", "dummy_data_dir", + "--port", "31999", "--rest"], stdout=null, stderr=null) + except: + print "No mongod? Skipping..." + return + if not did_mongod_start(): + print "Slow mongod? Skipping..." + stop(p) + return + print "Started mongod" + + with open(out, "w") as f: + f.write("") + f.write(urllib2.urlopen("http://localhost:32999/_commands").read()) + + print "Stopping mongod" + stop(p) + +def gen_cplusplus(dir): + clean_dir(dir) + clean_dir("docs/doxygen") + + # Too noisy... + with open("/dev/null") as null: + subprocess.call(["doxygen", "doxygenConfig"], stdout=null, stderr=null) + + os.rename("docs/doxygen/html", dir) + + +def version(): + """Get the server version from doxygenConfig. + """ + with open("doxygenConfig") as f: + for line in f.readlines(): + if line.startswith("PROJECT_NUMBER"): + return line.split("=")[1].strip() + + +def main(): + v = version() + print("Generating server docs in docs/html/internal/%s" % v) + convert_dir("docs", "docs/html/internal/%s" % v) + print("Generating commands list") + commands_list("docs/html/internal/%s/commands.html" % v) + shutil.rmtree("dummy_data_dir") + print("Generating C++ docs in docs/html/cplusplus/%s" % v) + gen_cplusplus("docs/html/cplusplus/%s" % v) + + +if __name__ == "__main__": + main() + + diff -Nru mongodb-1.4.4/buildscripts/errorcodes.py mongodb-1.6.3/buildscripts/errorcodes.py --- mongodb-1.4.4/buildscripts/errorcodes.py 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/buildscripts/errorcodes.py 2010-09-24 10:02:42.000000000 -0700 @@ -3,23 +3,24 @@ import os import sys import re +import utils def getAllSourceFiles( arr=None , prefix="." ): if arr is None: arr = [] for x in os.listdir( prefix ): - if x.startswith( "." ) or x.startswith( "pcre-" ) or x.startswith( "32bit" ) or x.startswith( "mongodb-" ): + if x.startswith( "." ) or x.startswith( "pcre-" ) or x.startswith( "32bit" ) or x.startswith( "mongodb-" ) or x.startswith("debian") or x.startswith( "mongo-cxx-driver" ): continue full = prefix + "/" + x - if os.path.isdir( full ): + if os.path.isdir( full ) and not os.path.islink( full ): getAllSourceFiles( arr , full ) else: if full.endswith( ".cpp" ) or full.endswith( ".h" ) or full.endswith( ".c" ): arr.append( full ) return arr - + assertNames = [ "uassert" , "massert" ] def assignErrorCodes(): @@ -43,6 +44,8 @@ out.close() +codes = [] + def readErrorCodes( callback ): ps = [ re.compile( "([um]asser(t|ted)) *\( *(\d+)" ) , re.compile( "(User|Msg)Exceptio(n)\( *(\d+)" ) @@ -52,6 +55,7 @@ for line in open( x ): for p in ps: for m in p.findall( line ): + codes.append( ( x , lineNum , line , m[2] ) ) callback( x , lineNum , line , m[2] ) lineNum = lineNum + 1 @@ -78,8 +82,57 @@ readErrorCodes( checkDups ) return len( errors ) == 0 +def getBestMessage( err , start ): + err = err.partition( start )[2] + if not err: + return "" + err = err.partition( "\"" )[2] + if not err: + return "" + err = err.rpartition( "\"" )[0] + if not err: + return "" + return err + +def genErrorOutput(): + + g = utils.getGitVersion() + + if os.path.exists( "docs/errors.md" ): + i = open( "docs/errors.md" , "r" ) + + + out = open( "docs/errors.md" , 'w' ) + out.write( "MongoDB Error Codes\n==========\n\n\n" ) + + prev = "" + seen = {} + + codes.sort( key=lambda x: x[0]+"-"+x[3] ) + for f,l,line,num in codes: + if num in seen: + continue + seen[num] = True + + if f.startswith( "./" ): + f = f[2:] + + if f != prev: + out.write( "\n\n" ) + out.write( f + "\n----\n" ) + prev = f + + url = "http://github.com/mongodb/mongo/blob/" + g + "/" + f + "#L" + str(l) + + out.write( "* " + str(num) + " [code](" + url + ") " + getBestMessage( line , str(num) ) + "\n" ) + + out.write( "\n" ) + out.close() + if __name__ == "__main__": ok = checkErrorCodes() print( "ok:" + str( ok ) ) print( "next: " + str( getNextCode() ) ) + if ok: + genErrorOutput() diff -Nru mongodb-1.4.4/buildscripts/makealldists.py mongodb-1.6.3/buildscripts/makealldists.py --- mongodb-1.4.4/buildscripts/makealldists.py 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/buildscripts/makealldists.py 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,289 @@ +#!/usr/bin/python + +from __future__ import with_statement +import subprocess +import sys +import os +import time +import tempfile +import errno +import glob +import shutil +import settings +import simples3 + +def s3bucket(): + return simples3.S3Bucket(settings.bucket, settings.id, settings.key) + +def s3cp (bucket, filename, s3name): + defaultacl="public-read" + print "putting %s to %s" % (filename, s3name) + bucket.put(s3name, open(filename, "rb").read(), acl=defaultacl) + +def pushrepo(repodir): + files=subprocess.Popen(['find', repodir, '-type', 'f'], stdout=subprocess.PIPE).communicate()[0][:-1].split('\n') + bucket=s3bucket() + olddebs=[t[0] for t in bucket.listdir(prefix='distros/') if t[0].endswith('.deb')] + newdebs=[] + for fn in files: + if len(fn) == 0: + continue + tail = fn[len(repodir):] + # Note: be very careful not to produce s3names containing + # sequences of repeated slashes: s3 doesn't treat a////b as + # equivalent to a/b. + s3name1='distros-archive/'+time.strftime('%Y%m%d')+tail + s3name2='distros'+tail + s3cp(bucket, fn, s3name1) + s3cp(bucket, fn, s3name2) + if s3name1.endswith('.deb'): + newdebs.append(s3name1) + # FIXME: we ought to clean out old debs eventually, but this will + # blow away too much if we're trying to push a subset of what's + # supposed to be available. + #[bucket.delete(deb) for deb in set(olddebs).difference(set(newdebs))] + +def cat (inh, outh): + inh.seek(0) + for line in inh: + outh.write(line) + inh.close() + +# This generates all tuples from mixed-radix counting system, essentially. +def gen(listlist): + dim=len(listlist) + a=[0 for ignore in listlist] + while True: + yield [listlist[i][a[i]] for i in range(dim)] + a[0]+=1 + for j in range(dim): + if a[j] == len(listlist[j]): + if j2.5 + exc=sys.exc_value + if exc.errno == errno.EEXIST: + pass + else: + raise exc + + + +# This is a fairly peculiar thing to want to do, but our build process +# creates several apt repositories for each mongo version we build on +# any given Debian/Ubutnu release. To merge repositories together, we +# must concatenate the Packages.gz files. +def merge_directories_concatenating_conflicts (target, sources): + print sources + target = dirify(target) + for source in sources: + source = dirify(source) + files = subprocess.Popen(["find", source, "-type", "f"], stdout=subprocess.PIPE).communicate()[0].split('\n') + for f in files: + if f == '': + continue + rel = f[len(source):] + o=target+rel + makedirs(os.path.dirname(o)) + with open(f) as inh: + with open(target+rel, "a") as outh: + outh.write(inh.read()) + + +def parse_mongo_version_spec(spec): + l = spec.split(':') + if len(l) == 1: + l+=['',''] + elif len(l) == 2: + l+=[''] + return l + +def logfh(distro, distro_version, arch): + prefix = "%s-%s-%s.log." % (distro, distro_version, arch) + # This is a NamedTemporaryFile mostly so that I can tail(1) them + # as we go. + return tempfile.NamedTemporaryFile("w+b", -1, prefix=prefix) + +def spawn(distro, distro_version, arch, spec, directory, opts): + argv = ["python", "makedist.py"] + opts + [ directory, distro, distro_version, arch ] + [ spec ] +# cmd = "mkdir -p %s; cd %s; touch foo.deb; echo %s %s %s %s %s | tee Packages " % ( directory, directory, directory, distro, distro_version, arch, mongo_version ) +# print cmd +# argv = ["sh", "-c", cmd] + fh = logfh(distro, distro_version, arch) + print >> fh, "Running %s" % argv + # it's often handy to be able to run these things at the shell + # manually. FIXME: this ought to be slightly less than thoroughly + # ignorant of quoting issues (as is is now). + print >> fh, " ".join(argv) + fh.flush() + proc = subprocess.Popen(argv, stdin=None, stdout=fh, stderr=fh) + return (proc, fh, distro, distro_version, arch, spec) + +def win(name, logfh, winfh): + logfh.seek(0) + print >> winfh, "=== Winner %s ===" % name + cat(logfh, winfh) + print >> winfh, "=== End winner %s ===" % name + +def lose(name, logfh, losefh): + logfh.seek(0) + print >> losefh, "=== Loser %s ===" % name + cat(logfh, losefh) + print >> losefh, "=== End loser %s ===" % name + +def wait(procs, winfh, losefh, winners, losers): + print "." + sys.stdout.flush() + try: + (pid, stat) = os.wait() + except OSError, err: + print >> sys.stderr, "This shouldn't happen." + print >> sys.stderr, err + next + if pid: + [tup] = [tup for tup in procs if tup[0].pid == pid] + (proc, logfh, distro, distro_version, arch, spec) = tup + procs.remove(tup) + name = "%s %s %s" % (distro, distro_version, arch) + if os.WIFEXITED(stat): + if os.WEXITSTATUS(stat) == 0: + win(name, logfh, winfh) + winners.append(name) + else: + lose(name, logfh, losefh) + losers.append(name) + if os.WIFSIGNALED(stat): + lose(name, logfh, losefh) + losers.append(name) + + + +def __main__(): + # FIXME: getopt & --help. + print " ".join(sys.argv) + branches = sys.argv[-1] + makedistopts = sys.argv[1:-1] + + # Output from makedist.py goes here. + outputroot=tempfile.mkdtemp() + repodir=tempfile.mkdtemp() + + print "makedist output under: %s\ncombined repo: %s\n" % (outputroot, repodir) + sys.stdout.flush() + # Add more dist/version/architecture tuples as they're supported. + dists = (("ubuntu", "10.4"), + ("ubuntu", "9.10"), + ("ubuntu", "9.4"), + ("ubuntu", "8.10"), + ("debian", "5.0"), + ("centos", "5.4"), + ("fedora", "11"), + ("fedora", "12")) + arches = ("x86", "x86_64") +# mongos = branches.split(',') + # Run a makedist for each distro/version/architecture tuple above. + winners = [] + losers = [] + winfh=tempfile.TemporaryFile() + losefh=tempfile.TemporaryFile() + procs = [] + count = 0 + for ((distro, distro_version), arch, spec) in gen([dists, arches, [branches]]): + # FIXME: now x86 fedoras on RackSpace circa 04/10. + if distro == "fedora" and arch == "x86": + continue + count+=1 + opts = makedistopts + if distro in ["debian", "ubuntu"]: + outputdir = "%s/deb/%s" % (outputroot, distro) + elif distro in ["centos", "fedora", "redhat"]: + outputdir = "%s/rpm/%s/%s/os" % (outputroot, distro, distro_version) + else: + raise Exception("unsupported distro %s" % distro) + #opts += ["--subdirs"] + + procs.append(spawn(distro, distro_version, arch, spec, outputdir, opts)) + + if len(procs) == 8: + wait(procs, winfh, losefh, winners, losers) + + while procs: + wait(procs, winfh, losefh, winners, losers) + + winfh.seek(0) + losefh.seek(0) + nwinners=len(winners) + nlosers=len(losers) + print "%d winners; %d losers" % (nwinners, nlosers) + cat(winfh, sys.stdout) + cat(losefh, sys.stdout) + print "%d winners; %d losers" % (nwinners, nlosers) + if count == nwinners + nlosers: + print "All jobs accounted for" +# return 0 + else: + print "Lost some jobs...?" + return 1 + + sys.stdout.flush() + sys.stderr.flush() + + # this is sort of ridiculous, but the outputs from rpmbuild look + # like RPM/, but the repo wants to look like + # /RPM. + for dist in os.listdir(outputroot+'/rpm'): + if dist in ["centos", "fedora", "redhat"]: + distdir="%s/rpm/%s" % (outputroot, dist) + rpmdirs = subprocess.Popen(["find", distdir, "-type", "d", "-a", "-name", "RPMS"], stdout=subprocess.PIPE).communicate()[0].split('\n')[:-1] + for rpmdir in rpmdirs: + for arch in os.listdir(rpmdir): + archdir="%s/../%s" % (rpmdir, arch) + os.mkdir(archdir) + os.rename("%s/%s" % (rpmdir, arch), "%s/RPMS" % (archdir,)) + os.rmdir(rpmdir) + + + for flavor in os.listdir(outputroot): + argv=["python", "mergerepositories.py", flavor, "%s/%s" % (outputroot, flavor), repodir] + print "running %s" % argv + print " ".join(argv) + r = subprocess.Popen(argv).wait() + if r != 0: + raise Exception("mergerepositories.py exited %d" % r) + print repodir + pushrepo(repodir) + shutil.rmtree(outputroot) + shutil.rmtree(repodir) + + return 0 + + +if __name__ == '__main__': + __main__() + + +# FIXME: this ought to be someplace else. + +# FIXME: remove this comment when the buildbot does this. After this +# program, run something that amounts to +# +# find /tmp/distros -name *.deb -or -name Packages.gz | while read f; do echo "./s3cp.py $f ${f#/tmp/}"; done +# +# where ./s3cp.py is a trivial s3 put executable in this directory. + +# merge_directories_concatenating_conflicts('/tmp/distros/debian', '/tmp/distros-20100222/debian/HEAD', '/tmp/distros-20100222/debian/r1.3.2','/tmp/distros-20100222/debian/v1.2') + +# merge_directories_concatenating_conflicts('/tmp/distros/ubuntu', '/tmp/distros-20100222/ubuntu/HEAD', '/tmp/distros-20100222/ubuntu/r1.3.2', '/tmp/distros-20100222/ubuntu/v1.2') diff -Nru mongodb-1.4.4/buildscripts/makedist.py mongodb-1.6.3/buildscripts/makedist.py --- mongodb-1.4.4/buildscripts/makedist.py 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/buildscripts/makedist.py 2010-09-24 10:02:42.000000000 -0700 @@ -1,23 +1,13 @@ #!/usr/bin/env python -# makedist.py: make a distro package (on an EC2 instance) +# makedist.py: make a distro package (on an EC2 (or sometimes +# RackSpace) instance) # For ease of use, put a file called settings.py someplace in your # sys.path, containing something like the following: # makedist = { -# # ec2-api-tools needs the following two set in the process -# # environment. -# "EC2_HOME": "/path/to/ec2-api-tools", -# # The EC2 tools won't run at all unless this variable is set to a directory -# # relative to which a "bin/java" exists. -# "JAVA_HOME" : "/usr", -# # All the ec2-api-tools take these two as arguments. -# # Alternatively, you can set the environment variables EC2_PRIVATE_KEY and EC2_CERT -# # respectively, leave these two out of settings.py, and let the ec2 tools default. -# "ec2_pkey": "/path/to/pk-file.pem" -# "ec2_cert" : "/path/to/cert-file.pem" -# # This gets supplied to ec2-run-instances to rig up an ssh key for +# # This gets supplied to EC2 to rig up an ssh key for # # the remote user. # "ec2_sshkey" : "key-id", # # And so we need to tell our ssh processes where to find the @@ -54,6 +44,14 @@ import time import os.path import tempfile +import string +import settings + +from libcloud.types import Provider +from libcloud.providers import get_driver +from libcloud.drivers.ec2 import EC2NodeDriver, NodeImage +from libcloud.base import Node, NodeImage, NodeSize, NodeState +from libcloud.ssh import ParamikoSSHClient # For the moment, we don't handle any of the errors we raise, so it # suffices to have a simple subclass of Exception that just @@ -141,139 +139,125 @@ (("centos", "5.4", "x86_64"), "ami-ccb35ea5"), (("fedora", "8", "x86_64"), "ami-2547a34c"), (("fedora", "8", "x86"), "ami-5647a33f"))), + ("rackspace_imgname", + ((("fedora", "11", "x86_64"), "Fedora 11"), + (("fedora", "12", "x86_64"), "Fedora 12"), + (("fedora", "13", "x86_64"), "Fedora 13"))), ("ec2_mtype", ((("*", "*", "x86"), "m1.small"), (("*", "*", "x86_64"), "m1.large"))), ] +class nodeWrapper(object): + def __init__(self, configurator, **kwargs): + self.terminate = False if "no_terminate" in kwargs else True + self.use_internal_name = False + + def getHostname(self): + internal_name=self.node.private_ip[0] + public_name=self.node.public_ip[0] + if not (internal_name or external_name): + raise Exception('host has no name?') + if self.use_internal_name: + # FIXME: by inspection, it seems this is sometimes the + # empty string. Dunno if that's EC2 or libcloud being + # stupid, but it's not good. + if internal_name: + return internal_name + else: + return public_name + else: + return public_name + + def initwait(self): + print "waiting for node to spin up" + # Wait for EC2 to tell us the node is running. + while 1: + n=None + # EC2 sometimes takes a while to report a node. + for i in range(6): + nodes = [n for n in self.list_nodes() if (n.id==self.node.id)] + if len(nodes)>0: + n=nodes[0] + break + else: + time.sleep(10) + if not n: + raise Exception("couldn't find node with id %s" % self.node.id) + if n.state == NodeState.PENDING: + time.sleep(10) + else: + self.node = n + break + print "ok" + # Now wait for the node's sshd to be accepting connections. + print "waiting for ssh" + sshwait = True + if sshwait == False: + return + while sshwait: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + try: + s.connect((self.node.public_ip[0], 22)) + sshwait = False + print "connected on port 22 (ssh)" + time.sleep(15) # arbitrary timeout, in case the + # remote sshd is slow. + except socket.error, err: + pass + finally: + s.close() + time.sleep(3) # arbitrary timeout + print "ok" + + def __enter__(self): + self.start() + # Note: we don't do an initwait() in __enter__ because if an + # exception is raised during __enter__, __exit__ doesn't get + # run (and by inspection RackSpace doesn't let you kill a node + # that hasn't finished booting yet). + return self + + def __exit__(self, type, value, traceback): + self.stop() + + def stop(self): + if self.terminate: + print "Destroying node %s" % self.node.id + self.node.destroy() + else: + print "Not terminating EC2 instance %s." % self.node.id + + def setup(self): + pass -class EC2Instance (object): +class EC2Instance (nodeWrapper): def __init__(self, configurator, **kwargs): + super(EC2Instance, self).__init__(configurator, **kwargs) # Stuff we need to start an instance: AMI name, key and cert # files. AMI and mtype default to configuration in this file, # but can be overridden. self.ec2_ami = configurator.findOrDefault(kwargs, "ec2_ami") self.ec2_mtype = configurator.findOrDefault(kwargs, "ec2_mtype") - self.use_internal_name = True if "use_internal_name" in kwargs else False - - # Authentication stuff defaults according to the conventions - # of the ec2-api-tools. - self.ec2_cert=kwargs["ec2_cert"] - self.ec2_pkey=kwargs["ec2_pkey"] self.ec2_sshkey=kwargs["ec2_sshkey"] # FIXME: this needs to be a commandline option self.ec2_groups = ["default", "buildbot-slave", "dist-slave"] - self.terminate = False if "no_terminate" in kwargs else True - def parsedesc (self, hdl): - line1=hdl.readline() - splitline1=line1.split() - (_, reservation, unknown1, groupstr) = splitline1[:4] - groups = groupstr.split(',') - self.ec2_reservation = reservation - self.ec2_unknown1 = unknown1 - self.ec2_groups = groups - # I haven't seen more than 4 data fields in one of these - # descriptions, but what do I know? - if len(splitline1)>4: - print >> sys.stderr, "more than 4 fields in description line 1\n%s\n" % line1 - self.ec2_extras1 = splitline1[4:] - line2=hdl.readline() - splitline2=line2.split() - # The jerks make it tricky to parse line 2: the fields are - # dependent on the instance's state. - (_, instance, ami, status_or_hostname) = splitline2[:4] - self.ec2_instance = instance - if ami != self.ec2_ami: - print >> sys.stderr, "warning: AMI in description isn't AMI we invoked\nwe started %s, but got\n%s", (self.ec2_ami, line2) - # FIXME: are there other non-running statuses? - if status_or_hostname in ["pending", "terminated"]: - self.ec2_status = status_or_hostname - self.ec2_running = False - index = 4 - self.ec2_storage = splitline2[index+8] - else: - self.ec2_running = True - index = 6 - self.ec2_status = splitline2[5] - self.ec2_external_hostname = splitline2[3] - self.ec2_internal_hostname = splitline2[4] - self.ec2_external_ipaddr = splitline2[index+8] - self.ec2_internal_ipaddr = splitline2[index+9] - self.ec2_storage = splitline2[index+10] - (sshkey, unknown2, mtype, starttime, zone, unknown3, unknown4, monitoring) = splitline2[index:index+8] - # FIXME: potential disagreement with the supplied sshkey? - self.ec2_sshkey = sshkey - self.ec2_unknown2 = unknown2 - # FIXME: potential disagreement with the supplied mtype? - self.ec2_mtype = mtype - self.ec2_starttime = starttime - self.ec2_zone = zone - self.ec2_unknown3 = unknown3 - self.ec2_unknown4 = unknown4 - self.ec2_monitoring = monitoring def start(self): "Fire up a fresh EC2 instance." - groups = reduce(lambda x, y : x+y, [["-g", i] for i in self.ec2_groups], []) - argv = ["ec2-run-instances", - self.ec2_ami, "-K", self.ec2_pkey, "-C", self.ec2_cert, - "-k", self.ec2_sshkey, "-t", self.ec2_mtype] + groups - self.ec2_running = False - print "running %s" % argv - proc = subprocess.Popen(argv, stdout=subprocess.PIPE) - try: - self.parsedesc(proc.stdout) - if self.ec2_instance == "": - raise SimpleError("instance id is empty") - else: - print "Instance id: %s" % self.ec2_instance - finally: - r = proc.wait() - if r != 0: - raise SimpleError("ec2-run-instances exited %d", r) - - def initwait(self): - # poll the instance description until we get a hostname. - # Note: it seems there can be a time interval after - # ec2-run-instance finishes during which EC2 will tell us that - # the instance ID doesn't exist. This is sort of bad. - state = "pending" - numtries = 0 - giveup = 5 - - while not self.ec2_running: - time.sleep(15) # arbitrary - argv = ["ec2-describe-instances", "-K", self.ec2_pkey, "-C", self.ec2_cert, self.ec2_instance] - proc = subprocess.Popen(argv, stdout=subprocess.PIPE) - try: - self.parsedesc(proc.stdout) - except Exception, e: - r = proc.wait() - if r < giveup: - print sys.stderr, str(e) - continue - else: - raise SimpleError("ec2-describe-instances exited %d", r) - numtries+=1 - - def stop(self): - if self.terminate: - LocalHost.runLocally(["ec2-terminate-instances", "-K", self.ec2_pkey, "-C", self.ec2_cert, self.ec2_instance]) - else: - print "Not terminating EC2 instance %s." % self.ec2_instance - - def __enter__(self): - self.start() - return self - - def __exit__(self, type, value, traceback): - self.stop() - - def getHostname(self): - return self.ec2_internal_hostname if self.use_internal_name else self.ec2_external_hostname + EC2 = get_driver(Provider.EC2) + self.driver = EC2NodeDriver(settings.id, settings.key) + image = NodeImage(self.ec2_ami, self.ec2_ami, EC2) + size = NodeSize(self.ec2_mtype, self.ec2_mtype, None, None, None, None, EC2) + self.node = self.driver.create_node(image=image, name=self.ec2_ami, size=size, keyname=self.ec2_sshkey, securitygroup=self.ec2_groups) + print "Created node %s" % self.node.id + + def list_nodes(self): + return self.driver.list_nodes() class SshConnectionConfigurator (BaseConfigurator): def __init__(self, **kwargs): @@ -287,6 +271,7 @@ (("ubuntu", "9.4", "*"), "root"), (("ubuntu", "8.10", "*"), "root"), (("ubuntu", "8.4", "*"), "ubuntu"), + (("fedora", "*", "*"), "root"), (("centos", "*", "*"), "root"))), ] @@ -300,28 +285,7 @@ # Gets set to False when we think we can ssh in. self.sshwait = True - def sshWait(self): - "Poll until somebody's listening on port 22" - - if self.sshwait == False: - return - while self.sshwait: - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - try: - s.connect((self.ssh_host, 22)) - self.sshwait = False - print "connected on port 22 (ssh)" - time.sleep(15) # arbitrary timeout, in case the - # remote sshd is slow. - except socket.error, err: - pass - finally: - s.close() - time.sleep(3) # arbitrary timeout - def initSsh(self): - self.sshWait() ctlpath="/tmp/ec2-ssh-%s-%s-%s" % (self.ssh_host, self.ssh_login, os.getpid()) argv = ["ssh", "-o", "StrictHostKeyChecking no", "-M", "-o", "ControlPath %s" % ctlpath, @@ -349,7 +313,6 @@ self.ssh_host] + argv) def sendFiles(self, files): - self.sshWait() for (localfile, remotefile) in files: LocalHost.runLocally(["scp", "-o", "StrictHostKeyChecking no", "-o", "ControlMaster auto", @@ -360,8 +323,6 @@ ("" if remotefile is None else remotefile) ]) def recvFiles(self, files): - self.sshWait() - print files for (remotefile, localfile) in files: LocalHost.runLocally(["scp", "-o", "StrictHostKeyChecking no", "-o", "ControlMaster auto", @@ -402,7 +363,8 @@ Conflicts: {pkg_name_conflicts}/' debian/control; ) || exit 1 ( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i 's|$(CURDIR)/debian/mongodb/|$(CURDIR)/debian/{pkg_name}{pkg_name_suffix}/|g' debian/rules) || exit 1 ( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i 's|debian/mongodb.manpages|debian/{pkg_name}{pkg_name_suffix}.manpages|g' debian/rules) || exit 1 -( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i '/^Name:/s/.*/Name: {pkg_name}{pkg_name_suffix}/; /^Version:/s/.*/Version: {pkg_version}/;' rpm/mongo.spec ) +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i '/^Name:/s/.*/Name: {pkg_name}{pkg_name_suffix}\\ +Conflicts: {pkg_name_conflicts}/; /^Version:/s/.*/Version: {pkg_version}/; /Requires.*mongo/s/mongo/{pkg_name}{pkg_name_suffix}/;' rpm/mongo.spec ) # Debian systems require some ridiculous workarounds to get an init # script at /etc/init.d/mongodb when the packge name isn't the init # script name. Note: dh_installinit --name won't work, because that @@ -412,6 +374,22 @@ ln debian/init.d debian/{pkg_name}{pkg_name_suffix}.mongodb.init && ln debian/mongodb.upstart debian/{pkg_name}{pkg_name_suffix}.mongodb.upstart && sed -i 's/dh_installinit/dh_installinit --name=mongodb/' debian/rules) || exit 1 +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && cat debian/rules) +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && cat rpm/mongo.spec) +""" + + # If we're just packaging up nightlies, do this: + nightly_build_mangle_files=""" +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i '/scons[[:space:]]*$/d; s^scons.*install^mkdir -p debian/{pkg_name}{pkg_name_suffix} \&\& wget http://downloads.mongodb.org/linux/mongodb-linux-{mongo_arch}-{mongo_pub_version}.tgz \&\& tar xzvf mongodb-linux-{mongo_arch}-{mongo_pub_version}.tgz \&\& find `tar tzf mongodb-linux-{mongo_arch}-{mongo_pub_version}.tgz | sed "s|/.*||" | sort -u | head -n1` -mindepth 1 -maxdepth 1 -type d | xargs -n1 -IARG mv -v ARG debian/{pkg_name}{pkg_name_suffix}/usr \&\& (rm debian/{pkg_name}{pkg_name_suffix}/usr/bin/mongosniff || true)^' debian/rules) +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i 's/^BuildRequires:.*//; s/scons.*\ -c//; s/scons.*\ all//; s^scons.*install^(mkdir -p $RPM_BUILD_ROOT/usr ; cd /tmp \&\& curl http://downloads.mongodb.org/linux/mongodb-linux-{mongo_arch}-{mongo_pub_version}.tgz > mongodb-linux-{mongo_arch}-{mongo_pub_version}.tgz \&\& tar xzvf mongodb-linux-{mongo_arch}-{mongo_pub_version}.tgz \&\& find `tar tzf mongodb-linux-{mongo_arch}-{mongo_pub_version}.tgz | sed "s|/.*||" | sort -u | head -n1` -mindepth 1 -maxdepth 1 -type d | xargs -n1 -IARG cp -pRv ARG $RPM_BUILD_ROOT/usr \&\& (rm -r $RPM_BUILD_ROOT/usr/bin/mongosniff $RPM_BUILD_ROOT/usr/lib64/libmongoclient.a $RPM_BUILD_ROOT/usr/lib/libmongoclient.a $RPM_BUILD_ROOT/usr/include/mongo || true))^' rpm/mongo.spec) +# Upstream nightlies no longer contain libmongoclient. +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i '/%package devel/{{N;N;d;}}; /%description devel/{{N;N;N;N;N;d;}}; /%files devel/{{N;N;N;d;}};' rpm/mongo.spec ) +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && cat debian/rules) +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && cat rpm/mongo.spec) +""" +#$RPM_BUILD_ROOT/usr/lib/libmongoclient.a $RPM_BUILD_ROOT/usr/lib64/libmongoclient.a + mangle_files_for_new_deb_xulrunner_commands = """ +( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}" && sed -i 's/xulrunner-dev/xulrunner-1.9.2-dev/g' debian/control ) """ mangle_files_for_ancient_redhat_commands = """ @@ -432,8 +410,10 @@ mkdir -p "{pkg_product_dir}/{distro_version}/10gen/source" ( cd "{pkg_name}{pkg_name_suffix}-{pkg_version}"; debuild ) || exit 1 # Try installing it -dpkg -i *.deb +dpkg -i {pkg_name}{pkg_name_suffix}*.deb ps ax | grep mongo || {{ echo "no running mongo" >/dev/stderr; exit 1; }} +dpkg --remove $(for f in {pkg_name}{pkg_name_suffix}*.deb ; do echo ${{f%%_*}}; done) +dpkg --purge $(for f in {pkg_name}{pkg_name_suffix}*.deb ; do echo ${{f%%_*}}; done) cp {pkg_name}{pkg_name_suffix}*.deb "{pkg_product_dir}/{distro_version}/10gen/binary-{distro_arch}" cp {pkg_name}{pkg_name_suffix}*.dsc "{pkg_product_dir}/{distro_version}/10gen/source" cp {pkg_name}{pkg_name_suffix}*.tar.gz "{pkg_product_dir}/{distro_version}/10gen/source" @@ -445,10 +425,11 @@ yum -y install {pkg_prereq_str} """ rpm_build_commands=""" -for d in BUILD BUILDROOT RPMS SOURCES SPECS SRPMS; do mkdir -p /usr/src/redhat/$d; done -cp -v "{pkg_name}{pkg_name_suffix}-{pkg_version}/rpm/mongo.spec" /usr/src/redhat/SPECS -tar -cpzf /usr/src/redhat/SOURCES/"{pkg_name}{pkg_name_suffix}-{pkg_version}".tar.gz "{pkg_name}{pkg_name_suffix}-{pkg_version}" -rpmbuild -ba /usr/src/redhat/SPECS/mongo.spec +for d in BUILD BUILDROOT RPMS SOURCES SPECS SRPMS; do mkdir -p {rpmbuild_dir}/$d; done +cp -v "{pkg_name}{pkg_name_suffix}-{pkg_version}/rpm/mongo.spec" {rpmbuild_dir}/SPECS/{pkg_name}{pkg_name_suffix}.spec +tar -cpzf {rpmbuild_dir}/SOURCES/"{pkg_name}{pkg_name_suffix}-{pkg_version}".tar.gz "{pkg_name}{pkg_name_suffix}-{pkg_version}" +rpmbuild -ba --target={distro_arch} {rpmbuild_dir}/SPECS/{pkg_name}{pkg_name_suffix}.spec +# FIXME: should install the rpms, check if mongod is running. """ # FIXME: this is clean, but adds 40 minutes or so to the build process. old_rpm_precommands = """ @@ -474,25 +455,28 @@ # On very old Debianoids, libboost--dev will be some old # boost that's not as thready as we want, but which Eliot says - # will work. - very_old_deb_prereqs = ["libboost-thread-dev", "libboost-filesystem-dev", "libboost-program-options-dev", "libboost-date-time-dev", "libboost-dev", "xulrunner1.9-dev"] + # will work; on very new Debianoids, libbost--dev is what we + # want. + unversioned_deb_boost_prereqs = ["libboost-thread-dev", "libboost-filesystem-dev", "libboost-program-options-dev", "libboost-date-time-dev", "libboost-dev"] + # On some in-between Debianoids, libboost--dev is still a + # 1.34, but 1.35 packages are available, so we want those. + versioned_deb_boost_prereqs = ["libboost-thread1.35-dev", "libboost-filesystem1.35-dev", "libboost-program-options1.35-dev", "libboost-date-time1.35-dev", "libboost1.35-dev"] + + unversioned_deb_xulrunner_prereqs = ["xulrunner-dev"] - # On less old (but still old!) Debianoids, libboost--dev is - # still a 1.34, but 1.35 packages are available, so we want those. - old_deb_prereqs = ["libboost-thread1.35-dev", "libboost-filesystem1.35-dev", "libboost-program-options1.35-dev", "libboost-date-time1.35-dev", "libboost1.35-dev", "xulrunner-dev"] - - # On newer Debianoids, libbost--dev is some sufficiently new - # thing. - new_deb_prereqs = [ "libboost-thread-dev", "libboost-filesystem-dev", "libboost-program-options-dev", "libboost-date-time-dev", "libboost-dev", "xulrunner-dev" ] + old_versioned_deb_xulrunner_prereqs = ["xulrunner-1.9-dev"] + new_versioned_deb_xulrunner_prereqs = ["xulrunner-1.9.2-dev"] common_deb_prereqs = [ "build-essential", "dpkg-dev", "libreadline-dev", "libpcap-dev", "libpcre3-dev", "git-core", "scons", "debhelper", "devscripts", "git-core" ] centos_preqres = ["js-devel", "readline-devel", "pcre-devel", "gcc-c++", "scons", "rpm-build", "git" ] - fedora_prereqs = ["js-devel", "readline-devel", "pcre-devel", "gcc-c++", "scons", "rpm-build", "git" ] + fedora_prereqs = ["js-devel", "readline-devel", "pcre-devel", "gcc-c++", "scons", "rpm-build", "git", "curl" ] def __init__(self, **kwargs): super(ScriptFileConfigurator, self).__init__(**kwargs) - if kwargs["mongo_version"][0] == 'r': + # FIXME: this method is disabled until we get back around to + # actually building from source. + if None: # kwargs["mongo_version"][0] == 'r': self.get_mongo_commands = """ wget -Otarball.tgz "http://github.com/mongodb/mongo/tarball/{mongo_version}"; tar xzf tarball.tgz @@ -502,7 +486,9 @@ self.get_mongo_commands = """ git clone git://github.com/mongodb/mongo.git """ - if kwargs['mongo_version'][0] == 'v': + # This is disabled for the moment. it's for building the + # tip of some versioned branch. + if None: #kwargs['mongo_version'][0] == 'v': self.get_mongo_commands +=""" ( cd mongo && git archive --prefix="{pkg_name}{pkg_name_suffix}-{pkg_version}/" "`git log origin/{mongo_version} | sed -n '1s/^commit //p;q'`" ) | tar xf - """ @@ -518,81 +504,188 @@ self.configuration += [("pkg_product_dir", ((("ubuntu", "*", "*"), self.deb_productdir), (("debian", "*", "*"), self.deb_productdir), - (("fedora", "*", "*"), self.rpm_productdir), - (("centos", "*", "*"), self.rpm_productdir))), + (("fedora", "*", "*"), "~/rpmbuild/RPMS"), + (("centos", "*", "*"), "/usr/src/redhat/RPMS"))), ("pkg_prereqs", ((("ubuntu", "9.4", "*"), - self.old_deb_prereqs + self.common_deb_prereqs), + self.versioned_deb_boost_prereqs + self.unversioned_deb_xulrunner_prereqs + self.common_deb_prereqs), (("ubuntu", "9.10", "*"), - self.new_deb_prereqs + self.common_deb_prereqs), + self.unversioned_deb_boost_prereqs + self.unversioned_deb_xulrunner_prereqs + self.common_deb_prereqs), (("ubuntu", "10.4", "*"), - self.new_deb_prereqs + self.common_deb_prereqs), + self.unversioned_deb_boost_prereqs + self.new_versioned_deb_xulrunner_prereqs + self.common_deb_prereqs), (("ubuntu", "8.10", "*"), - self.old_deb_prereqs + self.common_deb_prereqs), + self.versioned_deb_boost_prereqs + self.unversioned_deb_xulrunner_prereqs + self.common_deb_prereqs), (("ubuntu", "8.4", "*"), - self.very_old_deb_prereqs + self.common_deb_prereqs), + self.unversioned_deb_boost_prereqs + self.old_versioned_deb_xulrunner_prereqs + self.common_deb_prereqs), (("debian", "5.0", "*"), - self.old_deb_prereqs + self.common_deb_prereqs), - (("fedora", "8", "*"), + self.versioned_deb_boost_prereqs + self.unversioned_deb_xulrunner_prereqs + self.common_deb_prereqs), + (("fedora", "*", "*"), self.fedora_prereqs), (("centos", "5.4", "*"), self.centos_preqres))), + # FIXME: this is deprecated ("commands", ((("debian", "*", "*"), - self.preamble_commands + self.deb_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.deb_build_commands), - (("ubuntu", "*", "*"), + self.deb_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.deb_build_commands), + (("ubuntu", "10.4", "*"), + self.preamble_commands + self.deb_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.mangle_files_for_new_deb_xulrunner_commands + self.deb_build_commands), + (("ubuntu", "*", "*"), self.preamble_commands + self.deb_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.deb_build_commands), (("centos", "*", "*"), self.preamble_commands + self.old_rpm_precommands + self.rpm_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.mangle_files_for_ancient_redhat_commands + self.rpm_build_commands), (("fedora", "*", "*"), self.preamble_commands + self.old_rpm_precommands + self.rpm_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.rpm_build_commands))), + ("preamble_commands", + ((("*", "*", "*"), self.preamble_commands), + )), + ("install_prereqs", + ((("debian", "*", "*"), self.deb_prereq_commands), + (("ubuntu", "*", "*"), self.deb_prereq_commands), + (("centos", "*", "*"), self.rpm_prereq_commands), + (("fedora", "*", "*"), self.rpm_prereq_commands))), + ("get_mongo", + ((("*", "*", "*"), self.get_mongo_commands), + )), + ("mangle_mongo", + ((("debian", "*", "*"), self.mangle_files_commands), + (("ubuntu", "10.4", "*"), + self.mangle_files_commands + self.mangle_files_for_new_deb_xulrunner_commands), + (("ubuntu", "*", "*"), self.mangle_files_commands), + (("centos", "*", "*"), + self.mangle_files_commands + self.mangle_files_for_ancient_redhat_commands), + (("fedora", "*", "*"), + self.mangle_files_commands))), + ("build_prerequisites", + ((("fedora", "*", "*"), self.old_rpm_precommands), + (("centos", "*", "*"), self.old_rpm_precommands), + (("*", "*", "*"), ''))), + ("install_for_packaging", + ((("debian", "*", "*"),""), + (("ubuntu", "*", "*"),""), + (("fedora", "*", "*"), ""), + (("centos", "*", "*"),""))), + ("build_package", + ((("debian", "*", "*"), + self.deb_build_commands), + (("ubuntu", "*", "*"), + self.deb_build_commands), + (("fedora", "*", "*"), + self.rpm_build_commands), + (("centos", "*", "*"), + self.rpm_build_commands))), ("pkg_name", ((("debian", "*", "*"), "mongodb"), (("ubuntu", "*", "*"), "mongodb"), (("centos", "*", "*"), "mongo"), - - (("fedora", "*", "*"), "mongo") - )), + (("fedora", "*", "*"), "mongo"))), + # FIXME: there should be a command-line argument for this. ("pkg_name_conflicts", - ((("*", "*", "*"), ["", "-stable", "-unstable", "-snapshot"]), - )) - ] + ((("*", "*", "*"), ["", "-stable", "-unstable", "-snapshot", "-oldstable"]), + )), + ("rpmbuild_dir", + ((("fedora", "*", "*"), "~/rpmbuild"), + (("centos", "*", "*"), "/usr/src/redhat"), + (("*", "*","*"), ''), + )), + ] class ScriptFile(object): def __init__(self, configurator, **kwargs): - self.mongo_version = kwargs["mongo_version"] - self.pkg_version = kwargs["pkg_version"] - self.pkg_name_suffix = kwargs["pkg_name_suffix"] if "pkg_name_suffix" in kwargs else "" + self.configurator = configurator + self.mongo_version_spec = kwargs['mongo_version_spec'] + self.mongo_arch = kwargs["arch"] if kwargs["arch"] == "x86_64" else "i686" self.pkg_prereqs = configurator.default("pkg_prereqs") self.pkg_name = configurator.default("pkg_name") self.pkg_product_dir = configurator.default("pkg_product_dir") - self.pkg_name_conflicts = configurator.default("pkg_name_conflicts") if self.pkg_name_suffix else [] - self.pkg_name_conflicts.remove(self.pkg_name_suffix) if self.pkg_name_suffix and self.pkg_name_suffix in self.pkg_name_conflicts else [] - self.formatter = configurator.default("commands") + #self.formatter = configurator.default("commands") self.distro_name = configurator.default("distro_name") self.distro_version = configurator.default("distro_version") self.distro_arch = configurator.default("distro_arch") + def bogoformat(self, fmt, **kwargs): + r = '' + i = 0 + while True: + c = fmt[i] + if c in '{}': + i+=1 + c2=fmt[i] + if c2 == c: + r+=c + else: + j=i + while True: + p=fmt[j:].find('}') + if p == -1: + raise Exception("malformed format string starting at %d: no closing brace" % i) + else: + j+=p + if len(fmt) > (j+1) and fmt[j+1]=='}': + j+=2 + else: + break + key = fmt[i:j] + r+=kwargs[key] + i=j + else: + r+=c + i+=1 + if i==len(fmt): + return r + + def fmt(self, formatter, **kwargs): + try: + return string.Formatter.format(formatter, kwargs) + finally: + return self.bogoformat(formatter, **kwargs) + def genscript(self): - return self.formatter.format(mongo_version=self.mongo_version, - distro_name=self.distro_name, - distro_version=self.distro_version, - distro_arch=self.distro_arch, - pkg_prereq_str=" ".join(self.pkg_prereqs), - pkg_name=self.pkg_name, - pkg_name_suffix=self.pkg_name_suffix, - pkg_version=self.pkg_version, - pkg_product_dir=self.pkg_product_dir, - # KLUDGE: rpm specs and deb - # control files use - # comma-separated conflicts, - # but there's no reason to - # suppose this works elsewhere - pkg_name_conflicts = ", ".join([self.pkg_name+conflict for conflict in self.pkg_name_conflicts]) - ) + script='' + formatter = self.configurator.default("preamble_commands") + self.configurator.default("install_prereqs") + script+=self.fmt(formatter, + distro_name=self.distro_name, + distro_version=self.distro_version, + distro_arch=self.distro_arch, + pkg_name=self.pkg_name, + pkg_product_dir=self.pkg_product_dir, + mongo_arch=self.mongo_arch, + pkg_prereq_str=" ".join(self.pkg_prereqs), + ) + + specs=self.mongo_version_spec.split(',') + for spec in specs: + (version, pkg_name_suffix, pkg_version) = parse_mongo_version_spec(spec) + mongo_version = version if version[0] != 'n' else ('HEAD' if version == 'nlatest' else 'r'+version[1:]) #'HEAD' + mongo_pub_version = version.lstrip('n') if version[0] in 'n' else 'latest' + pkg_name_suffix = pkg_name_suffix if pkg_name_suffix else '' + pkg_version = pkg_version + pkg_name_conflicts = list(self.configurator.default("pkg_name_conflicts") if pkg_name_suffix else []) + pkg_name_conflicts.remove(pkg_name_suffix) if pkg_name_suffix and pkg_name_suffix in pkg_name_conflicts else [] + formatter = self.configurator.default("get_mongo") + self.configurator.default("mangle_mongo") + (self.configurator.nightly_build_mangle_files if version[0] == 'n' else '') +(self.configurator.default("build_prerequisites") if version[0] != 'n' else '') + self.configurator.default("install_for_packaging") + self.configurator.default("build_package") + script+=self.fmt(formatter, + mongo_version=mongo_version, + distro_name=self.distro_name, + distro_version=self.distro_version, + distro_arch=self.distro_arch, + pkg_prereq_str=" ".join(self.pkg_prereqs), + pkg_name=self.pkg_name, + pkg_name_suffix=pkg_name_suffix, + pkg_version=pkg_version, + pkg_product_dir=self.pkg_product_dir, + # KLUDGE: rpm specs and deb + # control files use + # comma-separated conflicts, + # but there's no reason to + # suppose this works elsewhere + pkg_name_conflicts = ", ".join([self.pkg_name+conflict for conflict in pkg_name_conflicts]), + mongo_arch=self.mongo_arch, + mongo_pub_version=mongo_pub_version, + rpmbuild_dir=self.configurator.default('rpmbuild_dir')) + script+='rm -rf mongo' + return script def __enter__(self): self.localscript=None @@ -614,6 +707,69 @@ class Configurator(SshConnectionConfigurator, EC2InstanceConfigurator, ScriptFileConfigurator, BaseHostConfigurator): def __init__(self, **kwargs): super(Configurator, self).__init__(**kwargs) + +class rackspaceInstance(nodeWrapper): + def __init__(self, configurator, **kwargs): + super(rackspaceInstance, self).__init__(configurator, **kwargs) + self.imgname=configurator.default('rackspace_imgname') + + def start(self): + driver = get_driver(Provider.RACKSPACE) + self.conn = driver(settings.rackspace_account, settings.rackspace_api_key) + name=self.imgname+'-'+str(os.getpid()) + images=filter(lambda x: (x.name.find(self.imgname) > -1), self.conn.list_images()) + sizes=self.conn.list_sizes() + sizes.sort(cmp=lambda x,y: int(x.ram) 1: + raise Exception("too many images with \"%s\" in the name" % self.imgname) + if len(images) < 1: + raise Exception("too few images with \"%s\" in the name" % self.imgname) + image = images[0] + self.node = self.conn.create_node(image=image, name=name, size=sizes[0]) + # Note: the password is available only in the response to the + # create_node request, not in subsequent list_nodes() + # requests; so although the node objects we get back from + # list_nodes() are usuable for most things, we must hold onto + # the initial password. + self.password = self.node.extra['password'] + print self.node + + def list_nodes(self): + return self.conn.list_nodes() + + def setup(self): + self.putSshKey() + + def putSshKey(self): + keyfile=settings.makedist['ssh_keyfile'] + ssh = ParamikoSSHClient(hostname = self.node.public_ip[0], password = self.password) + ssh.connect() + print "putting ssh public key" + ssh.put(".ssh/authorized_keys", contents=open(keyfile+'.pub').read(), chmod=0600) + print "ok" + +def parse_mongo_version_spec (spec): + foo = spec.split(":") + mongo_version = foo[0] # this can be a commit id, a + # release id "r1.2.2", or a branch name + # starting with v. + if len(foo) > 1: + pkg_name_suffix = foo[1] + if len(foo) > 2 and foo[2]: + pkg_version = foo[2] + else: + pkg_version = time.strftime("%Y%m%d") + if not pkg_name_suffix: + if mongo_version[0] in ["r", "v"]: + nums = mongo_version.split(".") + if int(nums[1]) % 2 == 0: + pkg_name_suffix = "-stable" + else: + pkg_name_suffix = "-unstable" + else: + pkg_name_suffix = "" + return (mongo_version, pkg_name_suffix, pkg_version) def main(): # checkEnvironment() @@ -629,59 +785,28 @@ try: import settings if "makedist" in dir ( settings ): - for key in ["EC2_HOME", "JAVA_HOME"]: - if key in settings.makedist: - os.environ[key] = settings.makedist[key] - for key in ["ec2_pkey", "ec2_cert", "ec2_sshkey", "ssh_keyfile" ]: + for key in ["ec2_sshkey", "ssh_keyfile", "gpg_homedir" ]: if key not in kwargs and key in settings.makedist: kwargs[key] = settings.makedist[key] except Exception, err: print "No settings: %s. Continuing anyway..." % err pass - # Ensure that PATH contains $EC2_HOME/bin - vars = ["EC2_HOME", "JAVA_HOME"] - for var in vars: - if os.getenv(var) == None: - raise SimpleError("Environment variable %s is unset; did you create a settings.py?", var) - - if len([True for x in os.environ["PATH"].split(":") if x.find(os.environ["EC2_HOME"]) > -1]) == 0: - os.environ["PATH"]=os.environ["EC2_HOME"]+"/bin:"+os.environ["PATH"] - - kwargs["distro_name"] = distro_name kwargs["distro_version"] = distro_version kwargs["arch"] = arch - - foo = mongo_version_spec.split(":") - kwargs["mongo_version"] = foo[0] # this can be a commit id, a - # release id "r1.2.2", or a - # branch name starting with v. - if len(foo) > 1: - kwargs["pkg_name_suffix"] = foo[1] - if len(foo) > 2 and foo[2]: - kwargs["pkg_version"] = foo[2] - else: - kwargs["pkg_version"] = time.strftime("%Y%m%d") - + kwargs['mongo_version_spec'] = mongo_version_spec + + kwargs["localdir"] = rootdir # FIXME: this should also include the mongo version or something. - if "subdirs" in kwargs: - kwargs["localdir"] = "%s/%s/%s/%s" % (rootdir, distro_name, distro_version, arch, kwargs["mongo_version"]) - else: - kwargs["localdir"] = rootdir +# if "subdirs" in kwargs: +# kwargs["localdir"] = "%s/%s/%s/%s/%s" % (rootdir, distro_name, distro_version, arch, kwargs["mongo_version"]) +# else: + - if "pkg_name_suffix" not in kwargs: - if kwargs["mongo_version"][0] in ["r", "v"]: - nums = kwargs["mongo_version"].split(".") - if int(nums[1]) % 2 == 0: - kwargs["pkg_name_suffix"] = "-stable" - else: - kwargs["pkg_name_suffix"] = "-unstable" - else: - kwargs["pkg_name_suffix"] = "" - kwargs['local_gpg_dir'] = kwargs["local_gpg_dir"] if "local_gpg_dir" in kwargs else os.path.expanduser("~/.gnupg") + kwargs['gpg_homedir'] = kwargs["gpg_homedir"] if "gpg_homedir" in kwargs else os.path.expanduser("~/.gnupg") configurator = Configurator(**kwargs) LocalHost.runLocally(["mkdir", "-p", kwargs["localdir"]]) with ScriptFile(configurator, **kwargs) as script: @@ -689,15 +814,18 @@ print """# Going to run the following on a fresh AMI:""" print f.read() time.sleep(10) - with EC2Instance(configurator, **kwargs) as ec2: - ec2.initwait() - kwargs["ssh_host"] = ec2.getHostname() + # FIXME: it's not the best to have two different pathways for + # the different hosting services, but... + with EC2Instance(configurator, **kwargs) if kwargs['distro_name'] != 'fedora' else rackspaceInstance(configurator, **kwargs) as host: + host.initwait() + host.setup() + kwargs["ssh_host"] = host.getHostname() with SshConnection(configurator, **kwargs) as ssh: ssh.runRemotely(["uname -a; ls /"]) ssh.runRemotely(["mkdir", "pkg"]) if "local_mongo_dir" in kwargs: ssh.sendFiles([(kwargs["local_mongo_dir"]+'/'+d, "pkg") for d in ["rpm", "debian"]]) - ssh.sendFiles([(kwargs['local_gpg_dir'], ".gnupg")]) + ssh.sendFiles([(kwargs['gpg_homedir'], ".gnupg")]) ssh.sendFiles([(script.localscript, "makedist.sh")]) ssh.runRemotely((["sudo"] if ssh.ssh_login != "root" else [])+ ["sh", "makedist.sh"]) ssh.recvFiles([(script.pkg_product_dir, kwargs['localdir'])]) @@ -709,7 +837,7 @@ ("N", "no-terminate", False, "Leave the EC2 instance running at the end of the job", None), ("S", "subdirs", False, "Create subdirectories of the output directory based on distro name, version, and architecture", None), ("I", "use-internal-name", False, "Use the EC2 internal hostname for sshing", None), - (None, "local-gpg-dir", True, "Local directory of gpg junk", "STRING"), + (None, "gpg-homedir", True, "Local directory of gpg junk", "STRING"), (None, "local-mongo-dir", True, "Copy packaging files from local mongo checkout", "DIRECTORY"), ] shortopts = "".join([t[0] + (":" if t[2] else "") for t in flagspec if t[0] is not None]) @@ -746,11 +874,12 @@ MONGO-VERSION-SPEC has the syntax Commit(:Pkg-Name-Suffix(:Pkg-Version)). If Commit starts with an 'r', -build from a tagged release; if Commit starts with a 'v', build from -the HEAD of a version branch; otherwise, build whatever git commit is -identified by Commit. Pkg-Name-Suffix gets appended to the package -name, and defaults to "-stable" and "-unstable" if Commit looks like -it designates a stable or unstable release/branch, respectively. +build from a tagged release; if Commit starts with an 'n', package up +a nightly build; if Commit starts with a 'v', build from the HEAD of a +version branch; otherwise, build whatever git commit is identified by +Commit. Pkg-Name-Suffix gets appended to the package name, and +defaults to "-stable" and "-unstable" if Commit looks like it +designates a stable or unstable release/branch, respectively. Pkg-Version is used as the package version, and defaults to YYYYMMDD. Examples: @@ -779,8 +908,7 @@ print "%-20s\t%s." % ("%4s--%s%s:" % ("-%s, " % t[0] if t[0] else "", t[1], ("="+t[4]) if t[4] else ""), t[3]) print """ Mandatory arguments to long options are also mandatory for short -options. Some EC2 arguments default to (and override) environment -variables; see the ec2-api-tools documentation.""" +options.""" sys.exit(0) if "usage" in kwargs: @@ -796,4 +924,5 @@ # Examples: -# ./makedist.py --local-gpg-dir=$HOME/10gen/dst/dist-gnupg /tmp/ubuntu ubuntu 8.10 x86_64 HEAD:-snapshot +# ./makedist.py /tmp/ubuntu ubuntu 8.10 x86_64 HEAD:-snapshot,v1.4:-stable,v1.5:-unstable +# ./makedist.py /tmp/ubuntu ubuntu 8.10 x86_64 nlatest:-snapshot,n1.4.2:-stable,n1.5.0:-unstable diff -Nru mongodb-1.4.4/buildscripts/mergerepositories.py mongodb-1.6.3/buildscripts/mergerepositories.py --- mongodb-1.4.4/buildscripts/mergerepositories.py 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/buildscripts/mergerepositories.py 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,194 @@ +#!/usr/bin/python + +from __future__ import with_statement +from libcloud.types import Provider +from libcloud.providers import get_driver +from libcloud.drivers.ec2 import EC2NodeDriver, NodeImage +from libcloud.base import Node, NodeImage, NodeSize, NodeState + +# libcloud's SSH client seems to be one of those pointless wrappers +# that (at the moment) both doesn't add anything to the thing it wraps +# (Paramiko) and also fails to expose the underlying thing's features. +# What's wrong with people? +#from libcloud.ssh import SSHClient + +import time +import sys +import settings +import subprocess +import os +import socket + +EC2 = get_driver(Provider.EC2) +EC2Driver=EC2NodeDriver(settings.id, settings.key) + +def tryEC2(): + + image=NodeImage('ami-bf07ead6', 'ubuntu 10.4', EC2) + size=NodeSize('m1.large', 'large', None, None, None, None, EC2) + + node = None + try: + node = EC2Driver.create_node(image=image, name="ubuntu-test", size=size, keyname="kp1", securitygroup=['default', 'dist-slave', 'buildbot-slave']) + print node + print node.id + while node.state == NodeState.PENDING: + time.sleep(3) + finally: + if node: + node.destroy() + + +class node(object): + def initWait(self): + while 1: + n=None + # EC2 sometimes takes a while to report a node. + for i in range(6): + nodes = [n for n in self.list_nodes() if (n.id==self.node.id)] + if len(nodes)>0: + n=nodes[0] + break + else: + time.sleep(10) + if not n: + raise Exception("couldn't find node with id %s" % self.node.id) + if n.state == NodeState.PENDING: + time.sleep(10) + else: + self.node = n + break + print "ok" + # Now wait for the node's sshd to be accepting connections. + print "waiting for ssh" + sshwait = True + if sshwait == False: + return + while sshwait: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + try: + s.connect((self.node.public_ip[0], 22)) + sshwait = False + print "connected on port 22 (ssh)" + time.sleep(15) # arbitrary timeout, in case the + # remote sshd is slow. + except socket.error, err: + pass + finally: + s.close() + time.sleep(3) # arbitrary timeout + print "ok" + + def __enter__(self): + return self + + def __exit__(self, arg0, arg1, arg2): + print "shutting down node %s" % self.node + self.node.destroy() + +# I don't think libcloud's Nodes implement __enter__ and __exit__, and +# I like the with statement for ensuring that we don't leak nodes when +# we don't have to. +class ec2node(node): + def list_nodes(self): + return EC2Driver.list_nodes() + +class ubuntuNode(ec2node): + def __init__(self): + image=NodeImage('ami-bf07ead6', 'ubuntu 10.4', EC2) + size=NodeSize('m1.large', 'large', None, None, None, None, EC2) + + self.node = EC2Driver.create_node(image=image, name="ubuntu-test", size=size, securitygroup=['default', 'dist-slave', 'buildbot-slave'], keyname='kp1') + +class centosNode(ec2node): + def __init__(self): + image=NodeImage('ami-ccb35ea5', 'ubuntu 10.4', EC2) + size=NodeSize('m1.large', 'large', None, None, None, None, EC2) + + self.node = EC2Driver.create_node(image=image, name="ubuntu-test", size=size, securitygroup=['default', 'dist-slave', 'buildbot-slave'], keyname='kp1') + +class rackspaceNode(node): + def list_nodes(self): + self.conn.list_nodes() + +class fedora11Node(rackspaceNode): + def __init__(self): + driver = get_driver(Provider.RACKSPACE) + self.conn = driver(settings.rackspace_account, settings.rackspace_api_key) + string='Fedora 11' + images=filter(lambda x: (x.name.find(string) > -1), self.conn.list_images()) + sizes=self.conn.list_sizes() + sizes.sort(cmp=lambda x,y: int(x.ram) $d/Packages; gzip -9c $d/Packages > $d/Packages.gz; done) ; done""" + makereleaseprologue="""Origin: 10gen +Label: 10gen +Suite: 10gen +Codename: VVVVVV +Version: VVVVVV +Architectures: i386 amd64 +Components: 10gen +Description: 10gen packages""" + makeaptrelease="""find . -maxdepth 3 -mindepth 3 | while read d; do ( cd $d && (echo '%s' | sed s/VVVVVV/$(basename $(pwd))/; apt-ftparchive release .) > /tmp/Release && mv /tmp/Release . && gpg -r `gpg --list-keys | grep uid | awk '{print $(NF)}'` --no-secmem-warning --no-tty -abs --output Release.gpg Release ); done""" % makereleaseprologue + with ubuntuNode() as ubuntu: + ubuntu.initWait() + print ubuntu.node + run_for_effect(["ssh", "-o", "StrictHostKeyChecking no","-i", keyfile, "ubuntu@"+ubuntu.node.public_ip[0], "sudo", "sh", "-c", "\"export DEBIAN_FRONTEND=noninteractive; apt-get update; apt-get -y install debhelper\""]) + run_for_effect(["scp", "-o", "StrictHostKeyChecking no","-i", keyfile, "-r", dir, "ubuntu@"+ubuntu.node.public_ip[0]+":"]) + run_for_effect(["scp", "-o", "StrictHostKeyChecking no","-i", keyfile, "-r", gpgdir, "ubuntu@"+ubuntu.node.public_ip[0]+":.gnupg"]) + run_for_effect(["ssh", "-o", "StrictHostKeyChecking no","-i", keyfile, "ubuntu@"+ubuntu.node.public_ip[0], "sh", "-c", "\"ls -lR ./" + dirtail + "\""]) + run_for_effect(["ssh", "-o", "StrictHostKeyChecking no","-i", keyfile, "ubuntu@"+ubuntu.node.public_ip[0], "cd ./"+dirtail + " && " + makeaptrepo]) + run_for_effect(["ssh", "-o", "StrictHostKeyChecking no","-i", keyfile, "ubuntu@"+ubuntu.node.public_ip[0], "cd ./"+dirtail + " && " + makeaptrelease]) + run_for_effect(["scp", "-o", "StrictHostKeyChecking no", "-i", keyfile, "-r", "ubuntu@"+ubuntu.node.public_ip[0]+":./"+dirtail +'/*', outdir]) + + +def run_for_effect(argv): + print " ".join(argv) + r=subprocess.Popen(argv).wait() + if r!=0: + raise Err("subprocess %s exited %d" % (argv, r)) + +if __name__ == "__main__": + (flavor, dir, outdir) = sys.argv[-3:] + + if flavor == "deb": + merge_apt_repo(dir, outdir) + elif flavor == "rpm": + merge_yum_repo(dir, outdir) + else: + Err("unknown pkg flavor %s" % flavor) + # TODO: yum repositories + + + #main() + #tryRackSpace() diff -Nru mongodb-1.4.4/buildscripts/smoke.py mongodb-1.6.3/buildscripts/smoke.py --- mongodb-1.4.4/buildscripts/smoke.py 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/buildscripts/smoke.py 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,522 @@ +#!/usr/bin/python + +# smoke.py: run some mongo tests. + +# Bugs, TODOs: + +# 0 Some tests hard-code pathnames relative to the mongo repository, +# so the smoke.py process and all its children must be run with the +# mongo repo as current working directory. That's kinda icky. + +# 1 The tests that are implemented as standalone executables ("test", +# "perftest"), don't take arguments for the dbpath, but +# unconditionally use "/tmp/unittest". + +# 2 mongod output gets intermingled with mongo output, and it's often +# hard to find error messages in the slop. Maybe have smoke.py do +# some fancier wrangling of child process output? + +# 3 Some test suites run their own mongods, and so don't need us to +# run any mongods around their execution. (It's harmless to do so, +# but adds noise in the output.) + +# 4 Running a separate mongo shell for each js file is slower than +# loading js files into one mongo shell process. Maybe have runTest +# queue up all filenames ending in ".js" and run them in one mongo +# shell at the "end" of testing? + +# 5 Right now small-oplog implies master/slave replication. Maybe +# running with replication should be an orthogonal concern. (And +# maybe test replica set replication, too.) + +# 6 We use cleanbb.py to clear out the dbpath, but cleanbb.py kills +# off all mongods on a box, which means you can't run two smoke.py +# jobs on the same host at once. So something's gotta change. + +from __future__ import with_statement +from subprocess import Popen, PIPE, call +import os +import sys +import utils +import time +import socket +from optparse import OptionParser +import atexit +import glob +import shutil +import re +import parser + +mongoRepo = os.getcwd() #'./' +testPath = None + +mongodExecutable = "./mongod" +mongodPort = "32000" +shellExecutable = "./mongo" +continueOnFailure = False +oneMongodPerTest = False + +tests = [] +winners = [] +losers = {} + +# Finally, atexit functions seem to be a little oblivious to whether +# Python is exiting because of an error, so we'll use this to +# communicate with the report() function. +exit_bad = True + +# For replication hash checking +replicated_dbs = [] +lost_in_slave = [] +lost_in_master = [] +screwy_in_slave = {} + +smokeDbPrefix = '' +smallOplog = False + +# This class just implements the with statement API, for a sneaky +# purpose below. +class nothing(object): + def __enter__(self): + return self + def __exit__(self, type, value, traceback): + return not isinstance(value, Exception) + +class mongod(object): + def __init__(self, **kwargs): + self.kwargs = kwargs + self.proc = None + + def __enter__(self): + self.start() + return self + + def __exit__(self, type, value, traceback): + try: + self.stop() + except Exception, e: + print >> sys.stderr, "error shutting down mongod" + print >> sys.stderr, e + return not isinstance(value, Exception) + + def ensureTestDirs(self): + utils.ensureDir( smokeDbPrefix + "/tmp/unittest/" ) + utils.ensureDir( smokeDbPrefix + "/data/" ) + utils.ensureDir( smokeDbPrefix + "/data/db/" ) + + def checkMongoPort( self, port=27017 ): + sock = socket.socket() + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + sock.settimeout(1) + sock.connect(("localhost", int(port))) + sock.close() + + def didMongodStart( self, port=mongodPort, timeout=20 ): + while timeout > 0: + time.sleep( 1 ) + try: + self.checkMongoPort( int(port) ) + return True + except Exception,e: + print >> sys.stderr, e + timeout = timeout - 1 + return False + + def start(self): + global mongodPort + global mongod + if self.proc: + print >> sys.stderr, "probable bug: self.proc already set in start()" + return + self.ensureTestDirs() + dirName = smokeDbPrefix + "/data/db/sconsTests/" + self.port = int(mongodPort) + self.slave = False + if 'slave' in self.kwargs: + dirName = smokeDbPrefix + '/data/db/sconsTestsSlave/' + srcport = mongodPort + self.port += 1 + self.slave = True + if os.path.exists ( dirName ): + if 'slave' in self.kwargs: + argv = ["python", "buildscripts/cleanbb.py", '--nokill', dirName] + + else: + argv = ["python", "buildscripts/cleanbb.py", dirName] + call( argv ) + utils.ensureDir( dirName ) + argv = [mongodExecutable, "--port", str(self.port), "--dbpath", dirName] + if self.kwargs.get('smallOplog'): + argv += ["--master", "--oplogSize", "10"] + if self.slave: + argv += ['--slave', '--source', 'localhost:'+str(srcport)] + print "running " + " ".join(argv) + self.proc = Popen(argv) + if not self.didMongodStart( self.port ): + raise Exception( "Failed to start mongod" ) + + if self.slave: + while True: + argv = [shellExecutable, "--port", str(self.port), "--quiet", "--eval", 'db.printSlaveReplicationInfo()'] + res = Popen(argv, stdout=PIPE).communicate()[0] + if res.find('initial sync') < 0: + break + + + + def stop(self): + if not self.proc: + print >> sys.stderr, "probable bug: self.proc unset in stop()" + return + try: + # This function not available in Python 2.5 + self.proc.terminate() + except AttributeError: + if os.sys.platform == "win32": + import win32process + win32process.TerminateProcess(self.proc._handle, -1) + else: + from os import kill + kill( self.proc.pid, 15 ) + self.proc.wait() + sys.stderr.flush() + sys.stdout.flush() + +class Bug(Exception): + def __str__(self): + return 'bug in smoke.py: ' + super(Bug, self).__str__() + +class TestFailure(Exception): + pass + +class TestExitFailure(TestFailure): + def __init__(self, *args): + self.path = args[0] + self.status=args[1] + def __str__(self): + return "test %s exited with status %d" % (self.path, self.status) + +class TestServerFailure(TestFailure): + def __init__(self, *args): + self.path = args[0] + self.status = -1 # this is meaningless as an exit code, but + # that's the point. + def __str__(self): + return 'mongod not running after executing test %s' % self.path + +def checkDbHashes(master, slave): + # Need to pause a bit so a slave might catch up... + if not slave.slave: + raise(Bug("slave instance doesn't have slave attribute set")) + + print "waiting for slave to catch up..." + ARB=10 # ARBITRARY + time.sleep(ARB) + while True: + # FIXME: it's probably better to do an empty insert and a + # getLastError() to force a sync. + argv = [shellExecutable, "--port", str(slave.port), "--quiet", "--eval", 'db.printSlaveReplicationInfo()'] + res = Popen(argv, stdout=PIPE).communicate()[0] + m = re.search('(\d+)secs ', res) + if int(m.group(1)) > ARB: #res.find('initial sync') < 0: + break + time.sleep(3) + + # FIXME: maybe make this run dbhash on all databases? + for mongod in [master, slave]: + argv = [shellExecutable, "--port", str(mongod.port), "--quiet", "--eval", "x=db.runCommand('dbhash'); printjson(x.collections)"] + hashstr = Popen(argv, stdout=PIPE).communicate()[0] + # WARNING FIXME KLUDGE et al.: this is sleazy and unsafe. + mongod.dict = eval(hashstr) + + global lost_in_slave, lost_in_master, screwy_in_slave, replicated_dbs + + for db in replicated_dbs: + if db not in slave.dict: + lost_in_slave.append(db) + mhash = master.dict[db] + shash = slave.dict[db] + if mhash != shash: + screwy_in_slave[db] = mhash + "/" + shash + for db in slave.dict.keys(): + if db not in master.dict: + lost_in_master.append(db) + replicated_dbs += master.dict.keys() + +# Blech. +def skipTest(path): + if smallOplog: + if os.path.basename(path) in ["cursor8.js", "indexh.js"]: + return True + return False + +def runTest(test): + (path, usedb) = test + (ignore, ext) = os.path.splitext(path) + if skipTest(path): + print "skippping " + path + return + if ext == ".js": + argv=[shellExecutable, "--port", mongodPort] + if not usedb: + argv += ["--nodb"] + if smallOplog: + argv += ["--eval", 'testingReplication = true;'] + argv += [path] + elif ext in ["", ".exe"]: + # Blech. + if os.path.basename(path) in ["test", "test.exe", "perftest", "perftest.exe"]: + argv=[path] + # more blech + elif os.path.basename(path) == 'mongos': + argv=[path, "--test"] + else: + argv=[testPath and os.path.abspath(os.path.join(testPath, path)) or path, + "--port", mongodPort] + else: + raise Bug("fell off in extenstion case: %s" % path) + print " *******************************************" + print " Test : " + os.path.basename(path) + " ..." + t1=time.time() + # FIXME: we don't handle the case where the subprocess + # hangs... that's bad. + r = call(argv, cwd=testPath) + t2=time.time() + print " " + str((t2-t1)*1000) + "ms" + if r != 0: + raise TestExitFailure(path, r) + if Popen( [ mongodExecutable, "msg", "ping", mongodPort ], stdout=PIPE ).communicate()[0].count( "****ok" ) == 0: + raise TestServerFailure(path) + if call( [ mongodExecutable, "msg", "ping", mongodPort ] ) != 0: + raise TestServerFailure(path) + print "" + +def runTests(tests): + # If we're in one-mongo-per-test mode, we instantiate a nothing + # around the loop, and a mongod inside the loop. + + # FIXME: some suites of tests start their own mongod, so don't + # need this. (So long as there are no conflicts with port, + # dbpath, etc., and so long as we shut ours down properly, + # starting this mongod shouldn't break anything, though.) + with nothing() if oneMongodPerTest else mongod(smallOplog=smallOplog) as master1: + with nothing() if oneMongodPerTest else (mongod(slave=True) if smallOplog else nothing()) as slave1: + for test in tests: + try: + with mongod(smallOplog=smallOplog) if oneMongodPerTest else nothing() as master2: + with mongod(slave=True) if oneMongodPerTest and smallOplog else nothing() as slave2: + runTest(test) + winners.append(test) + if isinstance(slave2, mongod): + checkDbHashes(master2, slave2) + except TestFailure, f: + try: + print f + # Record the failing test and re-raise. + losers[f.path] = f.status + raise f + except TestServerFailure, f: + if not oneMongodPerTest: + return 2 + except TestFailure, f: + if not continueOnFailure: + return 1 + if isinstance(slave1, mongod): + checkDbHashes(master1, slave1) + + return 0 + +def report(): + print "%d test%s succeeded" % (len(winners), '' if len(winners) == 1 else 's') + num_missed = len(tests) - (len(winners) + len(losers.keys())) + if num_missed: + print "%d tests didn't get run" % num_missed + if losers: + print "The following tests failed (with exit code):" + for loser in losers: + print "%s\t%d" % (loser, losers[loser]) + + def missing(lst, src, dst): + if lst: + print """The following collections were present in the %s but not the %s +at the end of testing:""" % (src, dst) + for db in lst: + print db + missing(lost_in_slave, "master", "slave") + missing(lost_in_master, "slave", "master") + if screwy_in_slave: + print """The following collections has different hashes in master and slave +at the end of testing:""" + for db in screwy_in_slave.keys(): + print "%s\t %s" % (db, screwy_in_slave[db]) + if smallOplog and not (lost_in_master or lost_in_slave or screwy_in_slave): + print "replication ok for %d collections" % (len(replicated_dbs)) + if (exit_bad or losers or lost_in_slave or lost_in_master or screwy_in_slave): + status = 1 + else: + status = 0 + exit (status) + +def expandSuites(suites): + globstr = None + global mongoRepo, tests + for suite in suites: + if suite == 'smokeAll': + tests = [] + expandSuites(['smoke', 'smokePerf', 'smokeClient', 'smokeJs', 'smokeJsPerf', 'smokeJsSlowNightly', 'smokeJsSlowWeekly', 'smokeParallel', 'smokeClone', 'smokeParallel', 'smokeRepl', 'smokeAuth', 'smokeSharding', 'smokeTool']) + break + if suite == 'smoke': + if os.sys.platform == "win32": + program = 'test.exe' + else: + program = 'test' + (globstr, usedb) = (program, False) + elif suite == 'smokePerf': + if os.sys.platform == "win32": + program = 'perftest.exe' + else: + program = 'perftest' + (globstr, usedb) = (program, False) + elif suite == 'smokeJs': + # FIXME: _runner.js seems equivalent to "[!_]*.js". + #(globstr, usedb) = ('_runner.js', True) + (globstr, usedb) = ('[!_]*.js', True) + elif suite == 'smokeQuota': + (globstr, usedb) = ('quota/*.js', True) + elif suite == 'smokeJsPerf': + (globstr, usedb) = ('perf/*.js', True) + elif suite == 'smokeDisk': + (globstr, usedb) = ('disk/*.js', True) + elif suite == 'smokeJsSlowNightly': + (globstr, usedb) = ('slowNightly/*.js', True) + elif suite == 'smokeJsSlowWeekly': + (globstr, usedb) = ('slowWeekly/*.js', True) + elif suite == 'smokeParallel': + (globstr, usedb) = ('parallel/*.js', True) + elif suite == 'smokeClone': + (globstr, usedb) = ('clone/*.js', False) + elif suite == 'smokeRepl': + (globstr, usedb) = ('repl/*.js', False) + elif suite == 'smokeReplSets': + (globstr, usedb) = ('replsets/*.js', False) + elif suite == 'smokeAuth': + (globstr, usedb) = ('auth/*.js', False) + elif suite == 'smokeSharding': + (globstr, usedb) = ('sharding/*.js', False) + elif suite == 'smokeTool': + (globstr, usedb) = ('tool/*.js', False) + # well, the above almost works for everything... + elif suite == 'smokeClient': + paths = ["firstExample", "secondExample", "whereExample", "authTest", "clientTest", "httpClientTest"] + if os.sys.platform == "win32": + paths = [path+'.exe' for path in paths] + # hack + tests += [(testPath and path or os.path.join(mongoRepo, path), False) for path in paths] + elif suite == 'mongosTest': + if os.sys.platform == "win32": + program = 'mongos.exe' + else: + program = 'mongos' + tests += [(os.path.join(mongoRepo, program), False)] + else: + raise Exception('unknown test suite %s' % suite) + + if globstr: + globstr = os.path.join(mongoRepo, (os.path.join(('jstests/' if globstr.endswith('.js') else ''), globstr))) + paths = glob.glob(globstr) + paths.sort() + tests += [(path, usedb) for path in paths] + if not tests: + raise Exception( "no tests found" ) + return tests + +def main(): + parser = OptionParser(usage="usage: smoke.py [OPTIONS] ARGS*") + parser.add_option('--mode', dest='mode', default='suite', + help='If "files", ARGS are filenames; if "suite", ARGS are sets of tests. (default "suite")') + # Some of our tests hard-code pathnames e.g., to execute, so until + # th we don't have the freedom to run from anyplace. +# parser.add_option('--mongo-repo', dest='mongoRepo', default=None, +# help='Top-level directory of mongo checkout to use. (default: script will make a guess)') + parser.add_option('--test-path', dest='testPath', default=None, + help="Path to the test executables to run " + "(currently only used for smokeClient)") + parser.add_option('--mongod', dest='mongodExecutable', #default='./mongod', + help='Path to mongod to run (default "./mongod")') + parser.add_option('--port', dest='mongodPort', default="32000", + help='Port the mongod will bind to (default 32000)') + parser.add_option('--mongo', dest='shellExecutable', #default="./mongo", + help='Path to mongo, for .js test files (default "./mongo")') + parser.add_option('--continue-on-failure', dest='continueOnFailure', + action="store_true", default=False, + help='If supplied, continue testing even after a test fails') + parser.add_option('--one-mongod-per-test', dest='oneMongodPerTest', + action="store_true", default=False, + help='If supplied, run each test in a fresh mongod') + parser.add_option('--from-file', dest='File', + help="Run tests/suites named in FILE, one test per line, '-' means stdin") + parser.add_option('--smoke-db-prefix', dest='smokeDbPrefix', default='', + help="Prefix to use for the mongods' dbpaths.") + parser.add_option('--small-oplog', dest='smallOplog', default=False, + action="store_true", + help='Run tests with master/slave replication & use a small oplog') + global tests + (options, tests) = parser.parse_args() + +# global mongoRepo +# if options.mongoRepo: +# pass +# mongoRepo = options.mongoRepo +# else: +# prefix = '' +# while True: +# if os.path.exists(prefix+'buildscripts'): +# mongoRepo = os.path.normpath(prefix) +# break +# else: +# prefix += '../' +# # FIXME: will this be a device's root directory on +# # Windows? +# if os.path.samefile('/', prefix): +# raise Exception("couldn't guess the mongo repository path") + + print tests + + global mongoRepo, mongodExecutable, mongodPort, shellExecutable, continueOnFailure, oneMongodPerTest, smallOplog, smokeDbPrefix, testPath + testPath = options.testPath + mongodExecutable = options.mongodExecutable if options.mongodExecutable else os.path.join(mongoRepo, 'mongod') + mongodPort = options.mongodPort if options.mongodPort else mongodPort + shellExecutable = options.shellExecutable if options.shellExecutable else os.path.join(mongoRepo, 'mongo') + continueOnFailure = options.continueOnFailure if options.continueOnFailure else continueOnFailure + oneMongodPerTest = options.oneMongodPerTest if options.oneMongodPerTest else oneMongodPerTest + smokeDbPrefix = options.smokeDbPrefix + smallOplog = options.smallOplog + + if options.File: + if options.File == '-': + tests = sys.stdin.readlines() + else: + with open(options.File) as f: + tests = f.readlines() + tests = [t.rstrip('\n') for t in tests] + + if not tests: + raise Exception( "no tests specified" ) + # If we're in suite mode, tests is a list of names of sets of tests. + if options.mode == 'suite': + # Suites: smoke, smokePerf, smokeJs, smokeQuota, smokeJsPerf, + # smokeJsSlow, smokeParalell, smokeClone, smokeRepl, smokeDisk + suites = tests + tests = [] + expandSuites(suites) + elif options.mode == 'files': + tests = [(os.path.abspath(test), True) for test in tests] + + runTests(tests) + global exit_bad + exit_bad = False + +atexit.register(report) + +if __name__ == "__main__": + main() diff -Nru mongodb-1.4.4/buildscripts/utils.py mongodb-1.6.3/buildscripts/utils.py --- mongodb-1.4.4/buildscripts/utils.py 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/buildscripts/utils.py 2010-09-24 10:02:42.000000000 -0700 @@ -2,9 +2,50 @@ import re import socket import time - +import os # various utilities that are handy +def getGitBranch(): + if not os.path.exists( ".git" ): + return None + + version = open( ".git/HEAD" ,'r' ).read().strip() + if not version.startswith( "ref: " ): + return version + version = version.split( "/" ) + version = version[len(version)-1] + return version + +def getGitBranchString( prefix="" , postfix="" ): + t = re.compile( '[/\\\]' ).split( os.getcwd() ) + if len(t) > 2 and t[len(t)-1] == "mongo": + par = t[len(t)-2] + m = re.compile( ".*_([vV]\d+\.\d+)$" ).match( par ) + if m is not None: + return prefix + m.group(1).lower() + postfix + if par.find("Nightly") > 0: + return "" + + + b = getGitBranch() + if b == None or b == "master": + return "" + return prefix + b + postfix + +def getGitVersion(): + if not os.path.exists( ".git" ): + return "nogitversion" + + version = open( ".git/HEAD" ,'r' ).read().strip() + if not version.startswith( "ref: " ): + return version + version = version[5:] + f = ".git/" + version + if not os.path.exists( f ): + return version + return open( f , 'r' ).read().strip() + + def execsys( args ): import subprocess if isinstance( args , str ): @@ -24,6 +65,40 @@ r = re.compile( "[\r\n]+" ) return r.split( raw ) + +def removeIfInList( lst , thing ): + if thing in lst: + lst.remove( thing ) + +def findVersion( root , choices ): + for c in choices: + if ( os.path.exists( root + c ) ): + return root + c + raise "can't find a version of [" + root + "] choices: " + choices + +def choosePathExist( choices , default=None): + for c in choices: + if c != None and os.path.exists( c ): + return c + return default + +def filterExists(paths): + return filter(os.path.exists, paths) + +def ensureDir( name ): + d = os.path.dirname( name ) + if not os.path.exists( d ): + print( "Creating dir: " + name ); + os.makedirs( d ) + if not os.path.exists( d ): + raise "Failed to create dir: " + name + + +def distinctAsString( arr ): + s = set() + for x in arr: + s.add( str(x) ) + return list(s) def checkMongoPort( port=27017 ): sock = socket.socket() @@ -32,6 +107,7 @@ sock.connect(("localhost", port)) sock.close() + def didMongodStart( port=27017 , timeout=20 ): while timeout > 0: time.sleep( 1 ) @@ -41,7 +117,5 @@ except Exception,e: print( e ) timeout = timeout - 1 - return False - diff -Nru mongodb-1.4.4/client/clientOnly.cpp mongodb-1.6.3/client/clientOnly.cpp --- mongodb-1.4.4/client/clientOnly.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/clientOnly.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,10 +15,11 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "../client/dbclient.h" #include "../db/dbhelpers.h" #include "../db/cmdline.h" +#include "../s/shard.h" namespace mongo { @@ -57,10 +58,15 @@ uassert( 10256 , "no createDirectClient in clientOnly" , 0 ); return 0; } -/* - auto_ptr Helpers::find( const char *ns , BSONObj query , bool requireIndex ){ - uassert( 10000 , "Helpers::find can't be used in client" , 0 ); - return auto_ptr(0); + + void Shard::getAllShards( vector& all ){ + assert(0); + } + + bool Shard::isAShard( const string& ident ){ + assert(0); + return false; } -*/ + + } diff -Nru mongodb-1.4.4/client/connpool.cpp mongodb-1.6.3/client/connpool.cpp --- mongodb-1.4.4/client/connpool.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/connpool.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -18,76 +18,99 @@ // _ todo: reconnect? -#include "stdafx.h" +#include "pch.h" #include "connpool.h" #include "../db/commands.h" #include "syncclusterconnection.h" +#include "../s/shard.h" namespace mongo { DBConnectionPool pool; + DBClientBase* DBConnectionPool::_get(const string& ident) { + scoped_lock L(_mutex); + + PoolForHost& p = _pools[ident]; + if ( p.pool.empty() ) + return 0; + + DBClientBase *c = p.pool.top(); + p.pool.pop(); + return c; + } + + DBClientBase* DBConnectionPool::_finishCreate( const string& host , DBClientBase* conn ){ + { + scoped_lock L(_mutex); + PoolForHost& p = _pools[host]; + p.created++; + } + + onCreate( conn ); + onHandedOut( conn ); + + return conn; + } + + DBClientBase* DBConnectionPool::get(const ConnectionString& url) { + DBClientBase * c = _get( url.toString() ); + if ( c ){ + onHandedOut( c ); + return c; + } + + string errmsg; + c = url.connect( errmsg ); + uassert( 13328 , (string)"dbconnectionpool: connect failed " + url.toString() + " : " + errmsg , c ); + + return _finishCreate( url.toString() , c ); + } + DBClientBase* DBConnectionPool::get(const string& host) { - scoped_lock L(poolMutex); + DBClientBase * c = _get( host ); + if ( c ){ + onHandedOut( c ); + return c; + } - PoolForHost *&p = pools[host]; - if ( p == 0 ) - p = new PoolForHost(); - if ( p->pool.empty() ) { - int numCommas = DBClientBase::countCommas( host ); - DBClientBase *c; - - if( numCommas == 0 ) { - DBClientConnection *cc = new DBClientConnection(true); - log(2) << "creating new connection for pool to:" << host << endl; - string errmsg; - if ( !cc->connect(host.c_str(), errmsg) ) { - delete cc; - uassert( 11002 , (string)"dbconnectionpool: connect failed " + host , false); - return 0; - } - c = cc; - onCreate( c ); - } - else if ( numCommas == 1 ) { - DBClientPaired *p = new DBClientPaired(); - if( !p->connect(host) ) { - delete p; - uassert( 11003 , (string)"dbconnectionpool: connect failed [2] " + host , false); - return 0; - } - c = p; - } - else if ( numCommas == 2 ) { - c = new SyncClusterConnection( host ); - } - else { - uassert( 13071 , (string)"invalid hostname [" + host + "]" , 0 ); + string errmsg; + ConnectionString cs = ConnectionString::parse( host , errmsg ); + uassert( 13071 , (string)"invalid hostname [" + host + "]" + errmsg , cs.isValid() ); + + c = cs.connect( errmsg ); + uassert( 11002 , (string)"dbconnectionpool: connect failed " + host + " : " + errmsg , c ); + return _finishCreate( host , c ); + } + + DBConnectionPool::~DBConnectionPool(){ + for ( map::iterator i = _pools.begin(); i != _pools.end(); i++ ){ + PoolForHost& p = i->second; + + while ( ! p.pool.empty() ){ + DBClientBase * c = p.pool.top(); + delete c; + p.pool.pop(); } - return c; } - DBClientBase *c = p->pool.top(); - p->pool.pop(); - onHandedOut( c ); - return c; } void DBConnectionPool::flush(){ - scoped_lock L(poolMutex); - for ( map::iterator i = pools.begin(); i != pools.end(); i++ ){ - PoolForHost* p = i->second; + scoped_lock L(_mutex); + for ( map::iterator i = _pools.begin(); i != _pools.end(); i++ ){ + PoolForHost& p = i->second; vector all; - while ( ! p->pool.empty() ){ - DBClientBase * c = p->pool.top(); - p->pool.pop(); + while ( ! p.pool.empty() ){ + DBClientBase * c = p.pool.top(); + p.pool.pop(); all.push_back( c ); bool res; c->isMaster( res ); } for ( vector::iterator i=all.begin(); i != all.end(); i++ ){ - p->pool.push( *i ); + p.pool.push( *i ); } } } @@ -114,6 +137,26 @@ } } + void DBConnectionPool::appendInfo( BSONObjBuilder& b ){ + scoped_lock lk( _mutex ); + BSONObjBuilder bb( b.subobjStart( "hosts" ) ); + for ( map::iterator i=_pools.begin(); i!=_pools.end(); ++i ){ + string s = i->first; + BSONObjBuilder temp( bb.subobjStart( s.c_str() ) ); + temp.append( "available" , (int)(i->second.pool.size()) ); + temp.appendNumber( "created" , i->second.created ); + temp.done(); + } + bb.done(); + } + + ScopedDbConnection * ScopedDbConnection::steal(){ + assert( _conn ); + ScopedDbConnection * n = new ScopedDbConnection( _host , _conn ); + _conn = 0; + return n; + } + ScopedDbConnection::~ScopedDbConnection() { if ( _conn ){ if ( ! _conn->isFailed() ) { @@ -124,20 +167,44 @@ } } + ScopedDbConnection::ScopedDbConnection(const Shard& shard ) + : _host( shard.getConnString() ) , _conn( pool.get(_host) ){ + } + + ScopedDbConnection::ScopedDbConnection(const Shard* shard ) + : _host( shard->getConnString() ) , _conn( pool.get(_host) ){ + } + class PoolFlushCmd : public Command { public: - PoolFlushCmd() : Command( "connpoolsync" ){} - virtual LockType locktype(){ return NONE; } - virtual bool run(const char*, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool){ + PoolFlushCmd() : Command( "connPoolSync" , false , "connpoolsync" ){} + virtual void help( stringstream &help ) const { help<<"internal"; } + virtual LockType locktype() const { return NONE; } + virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool){ pool.flush(); - result << "ok" << 1; return true; } - virtual bool slaveOk(){ + virtual bool slaveOk() const { return true; } } poolFlushCmd; + class PoolStats : public Command { + public: + PoolStats() : Command( "connPoolStats" ){} + virtual void help( stringstream &help ) const { help<<"stats about connection pool"; } + virtual LockType locktype() const { return NONE; } + virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool){ + pool.appendInfo( result ); + return true; + } + virtual bool slaveOk() const { + return true; + } + + } poolStatsCmd; + + } // namespace mongo diff -Nru mongodb-1.4.4/client/connpool.h mongodb-1.6.3/client/connpool.h --- mongodb-1.4.4/client/connpool.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/connpool.h 2010-09-24 10:02:42.000000000 -0700 @@ -19,20 +19,30 @@ #include #include "dbclient.h" +#include "redef_macros.h" namespace mongo { + class Shard; + struct PoolForHost { + PoolForHost() + : created(0){} + PoolForHost( const PoolForHost& other ){ + assert(other.pool.size() == 0); + created = other.created; + assert( created == 0 ); + } + std::stack pool; + long long created; }; class DBConnectionHook { public: virtual ~DBConnectionHook(){} - virtual void onCreate( DBClientBase * conn ){} virtual void onHandedOut( DBClientBase * conn ){} - }; /** Database connection pool. @@ -51,33 +61,54 @@ } */ class DBConnectionPool { - mongo::mutex poolMutex; - map pools; // servername -> pool + mongo::mutex _mutex; + map _pools; // servername -> pool list _hooks; + + DBClientBase* _get( const string& ident ); + DBClientBase* _finishCreate( const string& ident , DBClientBase* conn ); + + public: + DBConnectionPool() : _mutex("DBConnectionPool") { } + ~DBConnectionPool(); + + void onCreate( DBClientBase * conn ); void onHandedOut( DBClientBase * conn ); - public: + void flush(); + DBClientBase *get(const string& host); + DBClientBase *get(const ConnectionString& host); + void release(const string& host, DBClientBase *c) { if ( c->isFailed() ){ delete c; return; } - scoped_lock L(poolMutex); - pools[host]->pool.push(c); + scoped_lock L(_mutex); + _pools[host].pool.push(c); } void addHook( DBConnectionHook * hook ); + void appendInfo( BSONObjBuilder& b ); }; - + extern DBConnectionPool pool; + class AScopedConnection : boost::noncopyable { + public: + virtual ~AScopedConnection(){} + virtual DBClientBase* get() = 0; + virtual void done() = 0; + virtual string getHost() const = 0; + }; + /** Use to get a connection from the pool. On exceptions things clean up nicely. */ - class ScopedDbConnection { - const string host; + class ScopedDbConnection : public AScopedConnection { + const string _host; DBClientBase *_conn; public: /** get the associated connection object */ @@ -85,19 +116,42 @@ uassert( 11004 , "did you call done already" , _conn ); return _conn; } - + /** get the associated connection object */ DBClientBase& conn() { uassert( 11005 , "did you call done already" , _conn ); return *_conn; } + /** get the associated connection object */ + DBClientBase* get() { + uassert( 13102 , "did you call done already" , _conn ); + return _conn; + } + + ScopedDbConnection() + : _host( "" ) , _conn(0) { + } + /** throws UserException if can't connect */ - ScopedDbConnection(const string& _host) : - host(_host), _conn( pool.get(_host) ) { - //cout << " for: " << _host << " got conn: " << _conn << endl; + ScopedDbConnection(const string& host) + : _host(host), _conn( pool.get(host) ) { + } + + ScopedDbConnection(const string& host, DBClientBase* conn ) + : _host( host ) , _conn( conn ){ + } + + ScopedDbConnection(const Shard& shard ); + ScopedDbConnection(const Shard* shard ); + + ScopedDbConnection(const ConnectionString& url ) + : _host(url.toString()), _conn( pool.get(url) ) { } + + string getHost() const { return _host; } + /** Force closure of the connection. You should call this if you leave it in a bad state. Destructor will do this too, but it is verbose. */ @@ -121,12 +175,16 @@ kill(); else */ - pool.release(host, _conn); + pool.release(_host, _conn); _conn = 0; } + ScopedDbConnection * steal(); + ~ScopedDbConnection(); }; } // namespace mongo + +#include "undef_macros.h" diff -Nru mongodb-1.4.4/client/constants.h mongodb-1.6.3/client/constants.h --- mongodb-1.4.4/client/constants.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/constants.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,26 @@ +// constants.h + +#pragma once + +namespace mongo { + + /* query results include a 32 result flag word consisting of these bits */ + enum ResultFlagType { + /* returned, with zero results, when getMore is called but the cursor id + is not valid at the server. */ + ResultFlag_CursorNotFound = 1, + + /* { $err : ... } is being returned */ + ResultFlag_ErrSet = 2, + + /* Have to update config from the server, usually $err is also set */ + ResultFlag_ShardConfigStale = 4, + + /* for backward compatability: this let's us know the server supports + the QueryOption_AwaitData option. if it doesn't, a repl slave client should sleep + a little between getMore's. + */ + ResultFlag_AwaitCapable = 8 + }; + +} diff -Nru mongodb-1.4.4/client/dbclient.cpp mongodb-1.6.3/client/dbclient.cpp --- mongodb-1.4.4/client/dbclient.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/dbclient.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,22 +15,89 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "../db/pdfile.h" #include "dbclient.h" -#include "../util/builder.h" +#include "../bson/util/builder.h" #include "../db/jsobj.h" #include "../db/json.h" #include "../db/instance.h" #include "../util/md5.hpp" #include "../db/dbmessage.h" #include "../db/cmdline.h" +#include "connpool.h" +#include "../s/util.h" +#include "syncclusterconnection.h" namespace mongo { + DBClientBase* ConnectionString::connect( string& errmsg ) const { + switch ( _type ){ + case MASTER: { + DBClientConnection * c = new DBClientConnection(true); + log(1) << "creating new connection to:" << _servers[0] << endl; + if ( ! c->connect( _servers[0] , errmsg ) ) { + delete c; + return 0; + } + return c; + } + + case PAIR: + case SET: { + DBClientReplicaSet * set = new DBClientReplicaSet( _setName , _servers ); + if( ! set->connect() ){ + delete set; + errmsg = "connect failed to set "; + errmsg += toString(); + return 0; + } + return set; + } + + case SYNC: { + // TODO , don't copy + list l; + for ( unsigned i=0; i<_servers.size(); i++ ) + l.push_back( _servers[i] ); + return new SyncClusterConnection( l ); + } + + case INVALID: + throw UserException( 13421 , "trying to connect to invalid ConnectionString" ); + break; + } + + assert( 0 ); + return 0; + } + + ConnectionString ConnectionString::parse( const string& host , string& errmsg ){ + + string::size_type i = host.find( '/' ); + if ( i != string::npos ){ + // replica set + return ConnectionString( SET , host.substr( i + 1 ) , host.substr( 0 , i ) ); + } + + int numCommas = DBClientBase::countCommas( host ); + + if( numCommas == 0 ) + return ConnectionString( HostAndPort( host ) ); + + if ( numCommas == 1 ) + return ConnectionString( PAIR , host ); + + if ( numCommas == 2 ) + return ConnectionString( SYNC , host ); + + errmsg = (string)"invalid hostname [" + host + "]"; + return ConnectionString(); // INVALID + } + Query& Query::where(const string &jscode, BSONObj scope) { /* use where() before sort() and hint() and explain(), else this will assert. */ - assert( !obj.hasField("query") ); + assert( ! isComplex() ); BSONObjBuilder b; b.appendElements(obj); b.appendWhere(jscode, scope); @@ -39,7 +106,7 @@ } void Query::makeComplex() { - if ( obj.hasElement( "query" ) ) + if ( isComplex() ) return; BSONObjBuilder b; b.append( "query", obj ); @@ -76,19 +143,36 @@ return *this; } - bool Query::isComplex() const{ - return obj.hasElement( "query" ); + bool Query::isComplex( bool * hasDollar ) const{ + if ( obj.hasElement( "query" ) ){ + if ( hasDollar ) + hasDollar[0] = false; + return true; + } + + if ( obj.hasElement( "$query" ) ){ + if ( hasDollar ) + hasDollar[0] = true; + return true; + } + + return false; } BSONObj Query::getFilter() const { - if ( ! isComplex() ) + bool hasDollar; + if ( ! isComplex( &hasDollar ) ) return obj; - return obj.getObjectField( "query" ); + + return obj.getObjectField( hasDollar ? "$query" : "query" ); } BSONObj Query::getSort() const { if ( ! isComplex() ) return BSONObj(); - return obj.getObjectField( "orderby" ); + BSONObj ret = obj.getObjectField( "orderby" ); + if (ret.isEmpty()) + ret = obj.getObjectField( "$orderby" ); + return ret; } BSONObj Query::getHint() const { if ( ! isComplex() ) @@ -109,6 +193,17 @@ return o["ok"].trueValue(); } + enum QueryOptions DBClientWithCommands::availableOptions() { + if ( !_haveCachedAvailableOptions ) { + BSONObj ret; + if ( runCommand( "admin", BSON( "availablequeryoptions" << 1 ), ret ) ) { + _cachedAvailableOptions = ( enum QueryOptions )( ret.getIntField( "options" ) ); + } + _haveCachedAvailableOptions = true; + } + return _cachedAvailableOptions; + } + inline bool DBClientWithCommands::runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options) { string ns = dbname + ".$cmd"; info = findOne(ns, cmd, 0 , options); @@ -133,7 +228,7 @@ BSONObj res; if( !runCommand(ns.db.c_str(), cmd, res, options) ) uasserted(11010,string("count fails:") + res.toString()); - return res.getIntField("n"); + return res["n"].numberLong(); } BSONObj getlasterrorcmdobj = fromjson("{getlasterror:1}"); @@ -146,10 +241,14 @@ string DBClientWithCommands::getLastError() { BSONObj info = getLastErrorDetailed(); + return getLastErrorString( info ); + } + + string DBClientWithCommands::getLastErrorString( const BSONObj& info ){ BSONElement e = info["err"]; if( e.eoo() ) return ""; if( e.type() == Object ) return e.toString(); - return e.str(); + return e.str(); } BSONObj getpreverrorcmdobj = fromjson("{getpreverror:1}"); @@ -223,13 +322,14 @@ bool DBClientWithCommands::isMaster(bool& isMaster, BSONObj *info) { BSONObj o; - if ( info == 0 ) info = &o; + if ( info == 0 ) + info = &o; bool ok = runCommand("admin", ismastercmdobj, *info); - isMaster = (info->getIntField("ismaster") == 1); + isMaster = info->getField("ismaster").trueValue(); return ok; } - bool DBClientWithCommands::createCollection(const string &ns, unsigned size, bool capped, int max, BSONObj *info) { + bool DBClientWithCommands::createCollection(const string &ns, long long size, bool capped, int max, BSONObj *info) { BSONObj o; if ( info == 0 ) info = &o; BSONObjBuilder b; @@ -346,64 +446,9 @@ string db = nsGetDB( ns ) + ".system.namespaces"; BSONObj q = BSON( "name" << ns ); - return count( db.c_str() , q ); - } - - - void testSort() { - DBClientConnection c; - string err; - if ( !c.connect("localhost", err) ) { - out() << "can't connect to server " << err << endl; - return; - } - - cout << "findOne returns:" << endl; - cout << c.findOne("test.foo", QUERY( "x" << 3 ) ).toString() << endl; - cout << c.findOne("test.foo", QUERY( "x" << 3 ).sort("name") ).toString() << endl; - + return count( db.c_str() , q ) != 0; } - /* TODO: unit tests should run this? */ - void testDbEval() { - DBClientConnection c; - string err; - if ( !c.connect("localhost", err) ) { - out() << "can't connect to server " << err << endl; - return; - } - - if( !c.auth("dwight", "u", "p", err) ) { - out() << "can't authenticate " << err << endl; - return; - } - - BSONObj info; - BSONElement retValue; - BSONObjBuilder b; - b.append("0", 99); - BSONObj args = b.done(); - bool ok = c.eval("dwight", "function() { return args[0]; }", info, retValue, &args); - out() << "eval ok=" << ok << endl; - out() << "retvalue=" << retValue.toString() << endl; - out() << "info=" << info.toString() << endl; - - out() << endl; - - int x = 3; - assert( c.eval("dwight", "function() { return 3; }", x) ); - - out() << "***\n"; - - BSONObj foo = fromjson("{\"x\":7}"); - out() << foo.toString() << endl; - int res=0; - ok = c.eval("dwight", "function(parm1) { return parm1.x; }", foo, res); - out() << ok << " retval:" << res << endl; - } - - void testPaired(); - /* --- dbclientconnection --- */ bool DBClientConnection::auth(const string &dbname, const string &username, const string &password_text, string& errmsg, bool digestPassword) { @@ -422,48 +467,42 @@ return DBClientBase::auth(dbname, username, password.c_str(), errmsg, false); } - BSONObj DBClientInterface::findOne(const string &ns, Query query, const BSONObj *fieldsToReturn, int queryOptions) { + BSONObj DBClientInterface::findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn, int queryOptions) { auto_ptr c = this->query(ns, query, 1, 0, fieldsToReturn, queryOptions); - massert( 10276 , "DBClientBase::findOne: transport error", c.get() ); + uassert( 10276 , "DBClientBase::findOne: transport error", c.get() ); + + if ( c->hasResultFlag( ResultFlag_ShardConfigStale ) ) + throw StaleConfigException( ns , "findOne has stale config" ); if ( !c->more() ) return BSONObj(); - return c->next().copy(); + return c->nextSafe().copy(); } - bool DBClientConnection::connect(const string &_serverAddress, string& errmsg) { - serverAddress = _serverAddress; + bool DBClientConnection::connect(const HostAndPort& server, string& errmsg){ + _server = server; + _serverString = _server.toString(); + return _connect( errmsg ); + } - string ip; - int port; - size_t idx = serverAddress.find( ":" ); - if ( idx != string::npos ) { - port = strtol( serverAddress.substr( idx + 1 ).c_str(), 0, 10 ); - ip = serverAddress.substr( 0 , idx ); - ip = hostbyname(ip.c_str()); - } else { - port = CmdLine::DefaultDBPort; - ip = hostbyname( serverAddress.c_str() ); - } - if( ip.empty() ) { - stringstream ss; - ss << "client connect: couldn't parse/resolve hostname: " << _serverAddress; - errmsg = ss.str(); + bool DBClientConnection::_connect( string& errmsg ){ + _serverString = _server.toString(); + // we keep around SockAddr for connection life -- maybe MessagingPort + // requires that? + server.reset(new SockAddr(_server.host().c_str(), _server.port())); + p.reset(new MessagingPort( _timeout, _logLevel )); + + if (server->getAddr() == "0.0.0.0"){ failed = true; return false; } - // we keep around SockAddr for connection life -- maybe MessagingPort - // requires that? - server = auto_ptr(new SockAddr(ip.c_str(), port)); - p = auto_ptr(new MessagingPort()); - if ( !p->connect(*server) ) { stringstream ss; - ss << "couldn't connect to server " << serverAddress << " " << ip << ":" << port; + ss << "couldn't connect to server " << _serverString; errmsg = ss.str(); failed = true; return false; @@ -480,22 +519,21 @@ return; lastReconnectTry = time(0); - log() << "trying reconnect to " << serverAddress << endl; + log(_logLevel) << "trying reconnect to " << _serverString << endl; string errmsg; - string tmp = serverAddress; failed = false; - if ( !connect(tmp.c_str(), errmsg) ) { - log() << "reconnect " << serverAddress << " failed " << errmsg << endl; + if ( ! _connect(errmsg) ) { + log(_logLevel) << "reconnect " << _serverString << " failed " << errmsg << endl; return; } - log() << "reconnect " << serverAddress << " ok" << endl; + log(_logLevel) << "reconnect " << _serverString << " ok" << endl; for( map< string, pair >::iterator i = authCache.begin(); i != authCache.end(); i++ ) { const char *dbname = i->first.c_str(); const char *username = i->second.first.c_str(); const char *password = i->second.second.c_str(); if( !DBClientBase::auth(dbname, username, password, errmsg, false) ) - log() << "reconnect: auth failed db:" << dbname << " user:" << username << ' ' << errmsg << '\n'; + log(_logLevel) << "reconnect: auth failed db:" << dbname << " user:" << username << ' ' << errmsg << '\n'; } } @@ -516,13 +554,76 @@ return auto_ptr< DBClientCursor >( 0 ); } + struct DBClientFunConvertor { + void operator()( DBClientCursorBatchIterator &i ) { + while( i.moreInCurrentBatch() ) { + _f( i.nextSafe() ); + } + } + boost::function _f; + }; + + unsigned long long DBClientConnection::query( boost::function f, const string& ns, Query query, const BSONObj *fieldsToReturn, int queryOptions ) { + DBClientFunConvertor fun; + fun._f = f; + boost::function ptr( fun ); + return DBClientConnection::query( ptr, ns, query, fieldsToReturn, queryOptions ); + } + + unsigned long long DBClientConnection::query( boost::function f, const string& ns, Query query, const BSONObj *fieldsToReturn, int queryOptions ) { + // mask options + queryOptions &= (int)( QueryOption_NoCursorTimeout | QueryOption_SlaveOk ); + unsigned long long n = 0; + + bool doExhaust = ( availableOptions() & QueryOption_Exhaust ); + if ( doExhaust ) { + queryOptions |= (int)QueryOption_Exhaust; + } + auto_ptr c( this->query(ns, query, 0, 0, fieldsToReturn, queryOptions) ); + massert( 13386, "socket error for mapping query", c.get() ); + + if ( !doExhaust ) { + while( c->more() ) { + DBClientCursorBatchIterator i( *c ); + f( i ); + n += i.n(); + } + return n; + } + + try { + while( 1 ) { + while( c->moreInCurrentBatch() ) { + DBClientCursorBatchIterator i( *c ); + f( i ); + n += i.n(); + } + + if( c->getCursorId() == 0 ) + break; + + c->exhaustReceiveMore(); + } + } + catch(std::exception&) { + /* connection CANNOT be used anymore as more data may be on the way from the server. + we have to reconnect. + */ + failed = true; + p->shutdown(); + throw; + } + + return n; + } + void DBClientBase::insert( const string & ns , BSONObj obj ) { Message toSend; BufBuilder b; int opts = 0; - b.append( opts ); - b.append( ns ); + b.appendNum( opts ); + b.appendStr( ns ); obj.appendSelfToBufBuilder( b ); toSend.setData( dbInsert , b.buf() , b.len() ); @@ -535,8 +636,8 @@ BufBuilder b; int opts = 0; - b.append( opts ); - b.append( ns ); + b.appendNum( opts ); + b.appendStr( ns ); for( vector< BSONObj >::const_iterator i = v.begin(); i != v.end(); ++i ) i->appendSelfToBufBuilder( b ); @@ -550,13 +651,13 @@ BufBuilder b; int opts = 0; - b.append( opts ); - b.append( ns ); + b.appendNum( opts ); + b.appendStr( ns ); int flags = 0; if ( justOne ) - flags |= 1; - b.append( flags ); + flags |= RemoveOption_JustOne; + b.appendNum( flags ); obj.obj.appendSelfToBufBuilder( b ); @@ -568,13 +669,13 @@ void DBClientBase::update( const string & ns , Query query , BSONObj obj , bool upsert , bool multi ) { BufBuilder b; - b.append( (int)0 ); // reserved - b.append( ns ); + b.appendNum( (int)0 ); // reserved + b.appendStr( ns ); int flags = 0; if ( upsert ) flags |= UpdateOption_Upsert; if ( multi ) flags |= UpdateOption_Multi; - b.append( flags ); + b.appendNum( flags ); query.obj.appendSelfToBufBuilder( b ); obj.appendSelfToBufBuilder( b ); @@ -599,7 +700,7 @@ if ( ! runCommand( nsToDatabase( ns.c_str() ) , BSON( "deleteIndexes" << NamespaceString( ns ).coll << "index" << indexName ) , info ) ){ - log() << "dropIndex failed: " << info << endl; + log(_logLevel) << "dropIndex failed: " << info << endl; uassert( 10007 , "dropIndex failed" , 0 ); } resetIndexCache(); @@ -684,15 +785,21 @@ /* -- DBClientCursor ---------------------------------------------- */ +#ifdef _DEBUG +#define CHECK_OBJECT( o , msg ) massert( 10337 , (string)"object not valid" + (msg) , (o).isValid() ) +#else +#define CHECK_OBJECT( o , msg ) +#endif + void assembleRequest( const string &ns, BSONObj query, int nToReturn, int nToSkip, const BSONObj *fieldsToReturn, int queryOptions, Message &toSend ) { CHECK_OBJECT( query , "assembleRequest query" ); // see query.h for the protocol we are using here. BufBuilder b; int opts = queryOptions; - b.append(opts); - b.append(ns.c_str()); - b.append(nToSkip); - b.append(nToReturn); + b.appendNum(opts); + b.appendStr(ns); + b.appendNum(nToSkip); + b.appendNum(nToReturn); query.appendSelfToBufBuilder(b); if ( fieldsToReturn ) fieldsToReturn->appendSelfToBufBuilder(b); @@ -713,6 +820,10 @@ port().piggyBack( toSend ); } + void DBClientConnection::recv( Message &m ) { + port().recv(m); + } + bool DBClientConnection::call( Message &toSend, Message &response, bool assertOk ) { /* todo: this is very ugly messagingport::call returns an error code AND can throw an exception. we should make it return void and just throw an exception anytime @@ -722,7 +833,7 @@ if ( !port().call(toSend, response) ) { failed = true; if ( assertOk ) - massert( 10278 , "dbclient error communicating with server", false); + uassert( 10278 , "dbclient error communicating with server", false); return false; } } @@ -736,178 +847,128 @@ void DBClientConnection::checkResponse( const char *data, int nReturned ) { /* check for errors. the only one we really care about at this stage is "not master" */ - if ( clientPaired && nReturned ) { + if ( clientSet && nReturned ) { + assert(data); BSONObj o(data); BSONElement e = o.firstElement(); if ( strcmp(e.fieldName(), "$err") == 0 && e.type() == String && strncmp(e.valuestr(), "not master", 10) == 0 ) { - clientPaired->isntMaster(); + clientSet->isntMaster(); } } } - int DBClientCursor::nextBatchSize(){ - if ( nToReturn == 0 ) - return batchSize; - if ( batchSize == 0 ) - return nToReturn; + void DBClientConnection::killCursor( long long cursorId ){ + BufBuilder b; + b.appendNum( (int)0 ); // reserved + b.appendNum( (int)1 ); // number + b.appendNum( cursorId ); - return batchSize < nToReturn ? batchSize : nToReturn; - } - - bool DBClientCursor::init() { - Message toSend; - if ( !cursorId ) { - assembleRequest( ns, query, nextBatchSize() , nToSkip, fieldsToReturn, opts, toSend ); - } else { - BufBuilder b; - b.append( opts ); - b.append( ns.c_str() ); - b.append( nToReturn ); - b.append( cursorId ); - toSend.setData( dbGetMore, b.buf(), b.len() ); - } - if ( !connector->call( toSend, *m, false ) ) - return false; - if ( ! m->data ) - return false; - dataReceived(); - return true; + Message m; + m.setData( dbKillCursors , b.buf() , b.len() ); + + sayPiggyBack( m ); } - void DBClientCursor::requestMore() { - assert( cursorId && pos == nReturned ); - - if (haveLimit){ - nToReturn -= nReturned; - assert(nToReturn > 0); - } - BufBuilder b; - b.append(opts); - b.append(ns.c_str()); - b.append(nextBatchSize()); - b.append(cursorId); + /* --- class dbclientpaired --- */ - Message toSend; - toSend.setData(dbGetMore, b.buf(), b.len()); - auto_ptr response(new Message()); - connector->call( toSend, *response ); - - m = response; - dataReceived(); - } - - void DBClientCursor::dataReceived() { - QueryResult *qr = (QueryResult *) m->data; - resultFlags = qr->resultFlags(); - if ( qr->resultFlags() & QueryResult::ResultFlag_CursorNotFound ) { - // cursor id no longer valid at the server. - assert( qr->cursorId == 0 ); - cursorId = 0; // 0 indicates no longer valid (dead) - // TODO: should we throw a UserException here??? - } - if ( cursorId == 0 || ! ( opts & QueryOption_CursorTailable ) ) { - // only set initially: we don't want to kill it on end of data - // if it's a tailable cursor - cursorId = qr->cursorId; - } - nReturned = qr->nReturned; - pos = 0; - data = qr->data(); - - connector->checkResponse( data, nReturned ); - /* this assert would fire the way we currently work: - assert( nReturned || cursorId == 0 ); - */ + string DBClientReplicaSet::toString() { + return getServerAddress(); } - /** If true, safe to call next(). Requests more from server if necessary. */ - bool DBClientCursor::more() { - if ( !_putBack.empty() ) - return true; + DBClientReplicaSet::DBClientReplicaSet( const string& name , const vector& servers ) + : _name( name ) , _currentMaster( 0 ), _servers( servers ){ - if (haveLimit && pos >= nToReturn) - return false; - - if ( pos < nReturned ) - return true; - - if ( cursorId == 0 ) - return false; - - requestMore(); - return pos < nReturned; - } - - BSONObj DBClientCursor::next() { - assert( more() ); - if ( !_putBack.empty() ) { - BSONObj ret = _putBack.top(); - _putBack.pop(); - return ret; - } - pos++; - BSONObj o(data); - data += o.objsize(); - return o; + for ( unsigned i=0; i<_servers.size(); i++ ) + _conns.push_back( new DBClientConnection( true , this ) ); } - - DBClientCursor::~DBClientCursor() { - DESTRUCTOR_GUARD ( - if ( cursorId && _ownCursor ) { - BufBuilder b; - b.append( (int)0 ); // reserved - b.append( (int)1 ); // number - b.append( cursorId ); - - Message m; - m.setData( dbKillCursors , b.buf() , b.len() ); - - connector->sayPiggyBack( m ); - } - ); + + DBClientReplicaSet::~DBClientReplicaSet(){ + for ( unsigned i=0; i<_conns.size(); i++ ) + delete _conns[i]; + _conns.clear(); } - - /* --- class dbclientpaired --- */ - - string DBClientPaired::toString() { - stringstream ss; - ss << "state: " << master << '\n'; - ss << "left: " << left.toStringLong() << '\n'; - ss << "right: " << right.toStringLong() << '\n'; + + string DBClientReplicaSet::getServerAddress() const { + StringBuilder ss; + if ( _name.size() ) + ss << _name << "/"; + + for ( unsigned i=0; i<_servers.size(); i++ ){ + if ( i > 0 ) + ss << ","; + ss << _servers[i].toString(); + } return ss.str(); } -#pragma warning(disable: 4355) - DBClientPaired::DBClientPaired() : - left(true, this), right(true, this) - { - master = NotSetL; - } -#pragma warning(default: 4355) - /* find which server, the left or right, is currently master mode */ - void DBClientPaired::_checkMaster() { + void DBClientReplicaSet::_checkMaster() { + + bool triedQuickCheck = false; + + log( _logLevel + 1) << "_checkMaster on: " << toString() << endl; for ( int retry = 0; retry < 2; retry++ ) { - int x = master; - for ( int pass = 0; pass < 2; pass++ ) { - DBClientConnection& c = x == 0 ? left : right; + for ( unsigned i=0; i<_conns.size(); i++ ){ + DBClientConnection * c = _conns[i]; try { bool im; BSONObj o; - c.isMaster(im, &o); + c->isMaster(im, &o); + if ( retry ) - log() << "checkmaster: " << c.toString() << ' ' << o.toString() << '\n'; + log(_logLevel) << "checkmaster: " << c->toString() << ' ' << o << '\n'; + + string maybePrimary; + if ( o["hosts"].type() == Array ){ + if ( o["primary"].type() == String ) + maybePrimary = o["primary"].String(); + + BSONObjIterator hi(o["hosts"].Obj()); + while ( hi.more() ){ + string toCheck = hi.next().String(); + int found = -1; + for ( unsigned x=0; x<_servers.size(); x++ ){ + if ( toCheck == _servers[x].toString() ){ + found = x; + break; + } + } + + if ( found == -1 ){ + HostAndPort h( toCheck ); + _servers.push_back( h ); + _conns.push_back( new DBClientConnection( true, this ) ); + string temp; + _conns[ _conns.size() - 1 ]->connect( h , temp ); + log( _logLevel ) << "updated set to: " << toString() << endl; + } + + } + } + if ( im ) { - master = (State) (x + 2); + _currentMaster = c; return; } + + if ( maybePrimary.size() && ! triedQuickCheck ){ + for ( unsigned x=0; x<_servers.size(); x++ ){ + if ( _servers[i].toString() != maybePrimary ) + continue; + triedQuickCheck = true; + _conns[x]->isMaster( im , &o ); + if ( im ){ + _currentMaster = _conns[x]; + return; + } + } + } } - catch (AssertionException&) { + catch ( std::exception& e ) { if ( retry ) - log() << "checkmaster: caught exception " << c.toString() << '\n'; + log(_logLevel) << "checkmaster: caught exception " << c->toString() << ' ' << e.what() << endl; } - x = x^1; } sleepsecs(1); } @@ -915,36 +976,54 @@ uassert( 10009 , "checkmaster: no master found", false); } - inline DBClientConnection& DBClientPaired::checkMaster() { - if ( master > NotSetR ) { + DBClientConnection * DBClientReplicaSet::checkMaster() { + if ( _currentMaster ){ // a master is selected. let's just make sure connection didn't die - DBClientConnection& c = master == Left ? left : right; - if ( !c.isFailed() ) - return c; - // after a failure, on the next checkMaster, start with the other - // server -- presumably it took over. (not critical which we check first, - // just will make the failover slightly faster if we guess right) - master = master == Left ? NotSetR : NotSetL; + if ( ! _currentMaster->isFailed() ) + return _currentMaster; + _currentMaster = 0; } _checkMaster(); - assert( master > NotSetR ); - return master == Left ? left : right; + assert( _currentMaster ); + return _currentMaster; } - DBClientConnection& DBClientPaired::slaveConn(){ - DBClientConnection& m = checkMaster(); - assert( ! m.isFailed() ); - return master == Left ? right : left; + DBClientConnection& DBClientReplicaSet::masterConn(){ + return *checkMaster(); } - bool DBClientPaired::connect(const string &serverHostname1, const string &serverHostname2) { + DBClientConnection& DBClientReplicaSet::slaveConn(){ + DBClientConnection * m = checkMaster(); + assert( ! m->isFailed() ); + + DBClientConnection * failedSlave = 0; + + for ( unsigned i=0; i<_conns.size(); i++ ){ + if ( m == _conns[i] ) + continue; + failedSlave = _conns[i]; + if ( _conns[i]->isFailed() ) + continue; + return *_conns[i]; + } + + assert(failedSlave); + return *failedSlave; + } + + bool DBClientReplicaSet::connect(){ string errmsg; - bool l = left.connect(serverHostname1, errmsg); - bool r = right.connect(serverHostname2, errmsg); - master = l ? NotSetL : NotSetR; - if ( !l && !r ) // it would be ok to fall through, but checkMaster will then try an immediate reconnect which is slow + + bool anyGood = false; + for ( unsigned i=0; i<_conns.size(); i++ ){ + if ( _conns[i]->connect( _servers[i] , errmsg ) ) + anyGood = true; + } + + if ( ! anyGood ) return false; + try { checkMaster(); } @@ -954,61 +1033,56 @@ return true; } - bool DBClientPaired::connect(string hostpairstring) { - size_t comma = hostpairstring.find( "," ); - uassert( 10010 , "bad hostpairstring", comma != string::npos); - return connect( hostpairstring.substr( 0 , comma ) , hostpairstring.substr( comma + 1 ) ); - } - - bool DBClientPaired::auth(const string &dbname, const string &username, const string &pwd, string& errmsg) { - DBClientConnection& m = checkMaster(); - if( !m.auth(dbname, username, pwd, errmsg) ) + bool DBClientReplicaSet::auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword ) { + DBClientConnection * m = checkMaster(); + if( !m->auth(dbname, username, pwd, errmsg, digestPassword ) ) return false; + /* we try to authentiate with the other half of the pair -- even if down, that way the authInfo is cached. */ - string e; - try { - if( &m == &left ) - right.auth(dbname, username, pwd, e); - else - left.auth(dbname, username, pwd, e); - } - catch( AssertionException&) { - } + for ( unsigned i=0; i<_conns.size(); i++ ){ + if ( _conns[i] == m ) + continue; + try { + string e; + _conns[i]->auth( dbname , username , pwd , e , digestPassword ); + } + catch ( AssertionException& ){ + } + } + return true; } - auto_ptr DBClientPaired::query(const string &a, Query b, int c, int d, - const BSONObj *e, int f, int g) - { - return checkMaster().query(a,b,c,d,e,f,g); - } - - BSONObj DBClientPaired::findOne(const string &a, Query b, const BSONObj *c, int d) { - return checkMaster().findOne(a,b,c,d); - } - - void testPaired() { - DBClientPaired p; - log() << "connect returns " << p.connect("localhost:27017", "localhost:27018") << endl; - - //DBClientConnection p(true); - string errmsg; - // log() << "connect " << p.connect("localhost", errmsg) << endl; - log() << "auth " << p.auth("dwight", "u", "p", errmsg) << endl; - - while( 1 ) { - sleepsecs(3); - try { - log() << "findone returns " << p.findOne("dwight.foo", BSONObj()).toString() << endl; - sleepsecs(3); - BSONObj info; - bool im; - log() << "ismaster returns " << p.isMaster(im,&info) << " info: " << info.toString() << endl; - } - catch(...) { - cout << "caught exception" << endl; - } - } - } + auto_ptr DBClientReplicaSet::query(const string &a, Query b, int c, int d, + const BSONObj *e, int f, int g){ + // TODO: if slave ok is set go to a slave + return checkMaster()->query(a,b,c,d,e,f,g); + } + + BSONObj DBClientReplicaSet::findOne(const string &a, const Query& b, const BSONObj *c, int d) { + return checkMaster()->findOne(a,b,c,d); + } + bool DBClientReplicaSet::isMember( const DBConnector * conn ) const { + if ( conn == this ) + return true; + + for ( unsigned i=0; i<_conns.size(); i++ ) + if ( _conns[i]->isMember( conn ) ) + return true; + + return false; + } + + + bool serverAlive( const string &uri ) { + DBClientConnection c( false, 0, 20 ); // potentially the connection to server could fail while we're checking if it's alive - so use timeouts + string err; + if ( !c.connect( uri, err ) ) + return false; + if ( !c.simpleCommand( "admin", 0, "ping" ) ) + return false; + return true; + } + } // namespace mongo diff -Nru mongodb-1.4.4/client/dbclientcursor.cpp mongodb-1.6.3/client/dbclientcursor.cpp --- mongodb-1.4.4/client/dbclientcursor.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/dbclientcursor.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,232 @@ +// dbclient.cpp - connect to a Mongo database as a database, from C++ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pch.h" +#include "dbclient.h" +#include "../db/dbmessage.h" +#include "../db/cmdline.h" +#include "connpool.h" +#include "../s/shard.h" + +namespace mongo { + + void assembleRequest( const string &ns, BSONObj query, int nToReturn, int nToSkip, const BSONObj *fieldsToReturn, int queryOptions, Message &toSend ); + + int DBClientCursor::nextBatchSize(){ + + if ( nToReturn == 0 ) + return batchSize; + + if ( batchSize == 0 ) + return nToReturn; + + return batchSize < nToReturn ? batchSize : nToReturn; + } + + bool DBClientCursor::init() { + Message toSend; + if ( !cursorId ) { + assembleRequest( ns, query, nextBatchSize() , nToSkip, fieldsToReturn, opts, toSend ); + } else { + BufBuilder b; + b.appendNum( opts ); + b.appendStr( ns ); + b.appendNum( nToReturn ); + b.appendNum( cursorId ); + toSend.setData( dbGetMore, b.buf(), b.len() ); + } + if ( !connector->call( toSend, *m, false ) ) + return false; + if ( m->empty() ) + return false; + dataReceived(); + return true; + } + + void DBClientCursor::requestMore() { + assert( cursorId && pos == nReturned ); + + if (haveLimit){ + nToReturn -= nReturned; + assert(nToReturn > 0); + } + BufBuilder b; + b.appendNum(opts); + b.appendStr(ns); + b.appendNum(nextBatchSize()); + b.appendNum(cursorId); + + Message toSend; + toSend.setData(dbGetMore, b.buf(), b.len()); + auto_ptr response(new Message()); + + if ( connector ){ + connector->call( toSend, *response ); + m = response; + dataReceived(); + } + else { + assert( _scopedHost.size() ); + ScopedDbConnection conn( _scopedHost ); + conn->call( toSend , *response ); + connector = conn.get(); + m = response; + dataReceived(); + connector = 0; + conn.done(); + } + } + + /** with QueryOption_Exhaust, the server just blasts data at us (marked at end with cursorid==0). */ + void DBClientCursor::exhaustReceiveMore() { + assert( cursorId && pos == nReturned ); + assert( !haveLimit ); + auto_ptr response(new Message()); + assert( connector ); + connector->recv(*response); + m = response; + dataReceived(); + } + + void DBClientCursor::dataReceived() { + QueryResult *qr = (QueryResult *) m->singleData(); + resultFlags = qr->resultFlags(); + + if ( qr->resultFlags() & ResultFlag_CursorNotFound ) { + // cursor id no longer valid at the server. + assert( qr->cursorId == 0 ); + cursorId = 0; // 0 indicates no longer valid (dead) + if ( ! ( opts & QueryOption_CursorTailable ) ) + throw UserException( 13127 , "getMore: cursor didn't exist on server, possible restart or timeout?" ); + } + + if ( cursorId == 0 || ! ( opts & QueryOption_CursorTailable ) ) { + // only set initially: we don't want to kill it on end of data + // if it's a tailable cursor + cursorId = qr->cursorId; + } + + nReturned = qr->nReturned; + pos = 0; + data = qr->data(); + + connector->checkResponse( data, nReturned ); + /* this assert would fire the way we currently work: + assert( nReturned || cursorId == 0 ); + */ + } + + /** If true, safe to call next(). Requests more from server if necessary. */ + bool DBClientCursor::more() { + _assertIfNull(); + + if ( !_putBack.empty() ) + return true; + + if (haveLimit && pos >= nToReturn) + return false; + + if ( pos < nReturned ) + return true; + + if ( cursorId == 0 ) + return false; + + requestMore(); + return pos < nReturned; + } + + BSONObj DBClientCursor::next() { + DEV _assertIfNull(); + if ( !_putBack.empty() ) { + BSONObj ret = _putBack.top(); + _putBack.pop(); + return ret; + } + + uassert(13422, "DBClientCursor next() called but more() is false", pos < nReturned); + + pos++; + BSONObj o(data); + data += o.objsize(); + /* todo would be good to make data null at end of batch for safety */ + return o; + } + + void DBClientCursor::peek(vector& v, int atMost) { + int m = atMost; + + /* + for( stack::iterator i = _putBack.begin(); i != _putBack.end(); i++ ) { + if( m == 0 ) + return; + v.push_back(*i); + m--; + n++; + } + */ + + int p = pos; + const char *d = data; + while( m && p < nReturned ) { + BSONObj o(d); + d += o.objsize(); + p++; + m--; + v.push_back(o); + } + } + + void DBClientCursor::attach( AScopedConnection * conn ){ + assert( _scopedHost.size() == 0 ); + assert( conn->get()->isMember( connector ) ); + _scopedHost = conn->getHost(); + conn->done(); + connector = 0; + } + + DBClientCursor::~DBClientCursor() { + if (!this) + return; + + DESTRUCTOR_GUARD ( + + if ( cursorId && _ownCursor ) { + BufBuilder b; + b.appendNum( (int)0 ); // reserved + b.appendNum( (int)1 ); // number + b.appendNum( cursorId ); + + Message m; + m.setData( dbKillCursors , b.buf() , b.len() ); + + if ( connector ){ + connector->sayPiggyBack( m ); + } + else { + assert( _scopedHost.size() ); + ScopedDbConnection conn( _scopedHost ); + conn->sayPiggyBack( m ); + conn.done(); + } + } + + ); + } + + +} // namespace mongo diff -Nru mongodb-1.4.4/client/dbclientcursor.h mongodb-1.6.3/client/dbclientcursor.h --- mongodb-1.4.4/client/dbclientcursor.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/dbclientcursor.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,204 @@ +// file dbclientcursor.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../pch.h" +#include "../util/message.h" +#include "../db/jsobj.h" +#include "../db/json.h" +#include + +namespace mongo { + + class AScopedConnection; + + /** Queries return a cursor object */ + class DBClientCursor : boost::noncopyable { + public: + /** If true, safe to call next(). Requests more from server if necessary. */ + bool more(); + + /** If true, there is more in our local buffers to be fetched via next(). Returns + false when a getMore request back to server would be required. You can use this + if you want to exhaust whatever data has been fetched to the client already but + then perhaps stop. + */ + int objsLeftInBatch() const { _assertIfNull(); return _putBack.size() + nReturned - pos; } + bool moreInCurrentBatch() { return objsLeftInBatch() > 0; } + + /** next + @return next object in the result cursor. + on an error at the remote server, you will get back: + { $err: } + if you do not want to handle that yourself, call nextSafe(). + */ + BSONObj next(); + + /** + restore an object previously returned by next() to the cursor + */ + void putBack( const BSONObj &o ) { _putBack.push( o.getOwned() ); } + + /** throws AssertionException if get back { $err : ... } */ + BSONObj nextSafe() { + BSONObj o = next(); + BSONElement e = o.firstElement(); + if( strcmp(e.fieldName(), "$err") == 0 ) { + if( logLevel >= 5 ) + log() << "nextSafe() error " << o.toString() << endl; + uassert(13106, "nextSafe(): " + o.toString(), false); + } + return o; + } + + /** peek ahead at items buffered for future next() calls. + never requests new data from the server. so peek only effective + with what is already buffered. + WARNING: no support for _putBack yet! + */ + void peek(vector&, int atMost); + + /** + iterate the rest of the cursor and return the number if items + */ + int itcount(){ + int c = 0; + while ( more() ){ + next(); + c++; + } + return c; + } + + /** cursor no longer valid -- use with tailable cursors. + note you should only rely on this once more() returns false; + 'dead' may be preset yet some data still queued and locally + available from the dbclientcursor. + */ + bool isDead() const { + return !this || cursorId == 0; + } + + bool tailable() const { + return (opts & QueryOption_CursorTailable) != 0; + } + + /** see ResultFlagType (constants.h) for flag values + mostly these flags are for internal purposes - + ResultFlag_ErrSet is the possible exception to that + */ + bool hasResultFlag( int flag ){ + _assertIfNull(); + return (resultFlags & flag) != 0; + } + + DBClientCursor( DBConnector *_connector, const string &_ns, BSONObj _query, int _nToReturn, + int _nToSkip, const BSONObj *_fieldsToReturn, int queryOptions , int bs ) : + connector(_connector), + ns(_ns), + query(_query), + nToReturn(_nToReturn), + haveLimit( _nToReturn > 0 && !(queryOptions & QueryOption_CursorTailable)), + nToSkip(_nToSkip), + fieldsToReturn(_fieldsToReturn), + opts(queryOptions), + batchSize(bs==1?2:bs), + m(new Message()), + cursorId(), + nReturned(), + pos(), + data(), + _ownCursor( true ){ + } + + DBClientCursor( DBConnector *_connector, const string &_ns, long long _cursorId, int _nToReturn, int options ) : + connector(_connector), + ns(_ns), + nToReturn( _nToReturn ), + haveLimit( _nToReturn > 0 && !(options & QueryOption_CursorTailable)), + opts( options ), + m(new Message()), + cursorId( _cursorId ), + nReturned(), + pos(), + data(), + _ownCursor( true ){ + } + + virtual ~DBClientCursor(); + + long long getCursorId() const { return cursorId; } + + /** by default we "own" the cursor and will send the server a KillCursor + message when ~DBClientCursor() is called. This function overrides that. + */ + void decouple() { _ownCursor = false; } + + void attach( AScopedConnection * conn ); + + private: + friend class DBClientBase; + friend class DBClientConnection; + bool init(); + int nextBatchSize(); + DBConnector *connector; + string ns; + BSONObj query; + int nToReturn; + bool haveLimit; + int nToSkip; + const BSONObj *fieldsToReturn; + int opts; + int batchSize; + auto_ptr m; + stack< BSONObj > _putBack; + int resultFlags; + long long cursorId; + int nReturned; + int pos; + const char *data; + void dataReceived(); + void requestMore(); + void exhaustReceiveMore(); // for exhaust + bool _ownCursor; // see decouple() + string _scopedHost; + + // Don't call from a virtual function + void _assertIfNull() const { uassert(13348, "connection died", this); } + }; + + /** iterate over objects in current batch only - will not cause a network call + */ + class DBClientCursorBatchIterator { + public: + DBClientCursorBatchIterator( DBClientCursor &c ) : _c( c ), _n() {} + bool moreInCurrentBatch() { return _c.moreInCurrentBatch(); } + BSONObj nextSafe() { + massert( 13383, "BatchIterator empty", moreInCurrentBatch() ); + ++_n; + return _c.nextSafe(); + } + int n() const { return _n; } + private: + DBClientCursor &_c; + int _n; + }; + +} // namespace mongo + +#include "undef_macros.h" diff -Nru mongodb-1.4.4/client/dbclient.h mongodb-1.6.3/client/dbclient.h --- mongodb-1.4.4/client/dbclient.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/dbclient.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../util/message.h" #include "../db/jsobj.h" #include "../db/json.h" @@ -51,7 +51,7 @@ // an extended period of time. QueryOption_OplogReplay = 1 << 3, - /** The server normally times out idle cursors after an inactivy period to prevent excess memory use + /** The server normally times out idle cursors after an inactivy period to prevent excess memory uses Set this option to prevent that. */ QueryOption_NoCursorTimeout = 1 << 4, @@ -59,7 +59,18 @@ /** Use with QueryOption_CursorTailable. If we are at the end of the data, block for a while rather than returning no data. After a timeout period, we do return as normal. */ - QueryOption_AwaitData = 1 << 5 + QueryOption_AwaitData = 1 << 5, + + /** Stream the data down full blast in multiple "more" packages, on the assumption that the client + will fully read all data queried. Faster when you are pulling a lot of data and know you want to + pull it all down. Note: it is not allowed to not read all the data unless you close the connection. + + Use the query( boost::function f, ... ) version of the connection's query() + method, and it will take care of all the details for you. + */ + QueryOption_Exhaust = 1 << 6, + + QueryOption_AllSupported = QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay | QueryOption_NoCursorTimeout | QueryOption_AwaitData | QueryOption_Exhaust }; @@ -69,10 +80,138 @@ /** Update multiple documents (if multiple documents match query expression). (Default is update a single document and stop.) */ - UpdateOption_Multi = 1 << 1 + UpdateOption_Multi = 1 << 1, + + /** flag from mongo saying this update went everywhere */ + UpdateOption_Broadcast = 1 << 2 + }; + + enum RemoveOptions { + /** only delete one option */ + RemoveOption_JustOne = 1 << 0, + + /** flag from mongo saying this update went everywhere */ + RemoveOption_Broadcast = 1 << 1 + }; + + class DBClientBase; + + class ConnectionString { + public: + enum ConnectionType { INVALID , MASTER , PAIR , SET , SYNC }; + + ConnectionString( const HostAndPort& server ){ + _type = MASTER; + _servers.push_back( server ); + _finishInit(); + } + + // TODO Delete if nobody is using + //ConnectionString( ConnectionType type , const vector& servers ) + // : _type( type ) , _servers( servers ){ + // _finishInit(); + //} + + ConnectionString( ConnectionType type , const string& s , const string& setName = "" ){ + _type = type; + _setName = setName; + _fillServers( s ); + + switch ( _type ){ + case MASTER: + assert( _servers.size() == 1 ); + break; + case SET: + assert( _setName.size() ); + assert( _servers.size() >= 1 ); // 1 is ok since we can derive + break; + case PAIR: + assert( _servers.size() == 2 ); + break; + default: + assert( _servers.size() > 0 ); + } + + _finishInit(); + } + + ConnectionString( const string& s , ConnectionType favoredMultipleType ){ + _fillServers( s ); + if ( _servers.size() == 1 ){ + _type = MASTER; + } + else { + _type = favoredMultipleType; + assert( _type != MASTER ); + } + _finishInit(); + } + + bool isValid() const { return _type != INVALID; } + + string toString() const { + return _string; + } + + DBClientBase* connect( string& errmsg ) const; + + static ConnectionString parse( const string& url , string& errmsg ); + + string getSetName() const{ + return _setName; + } + + vector getServers() const { + return _servers; + } + + private: + + ConnectionString(){ + _type = INVALID; + } + + void _fillServers( string s ){ + string::size_type idx; + while ( ( idx = s.find( ',' ) ) != string::npos ){ + _servers.push_back( s.substr( 0 , idx ) ); + s = s.substr( idx + 1 ); + } + _servers.push_back( s ); + } + + void _finishInit(){ + stringstream ss; + if ( _type == SET ) + ss << _setName << "/"; + for ( unsigned i=0; i<_servers.size(); i++ ){ + if ( i > 0 ) + ss << ","; + ss << _servers[i].toString(); + } + _string = ss.str(); + } + + ConnectionType _type; + vector _servers; + string _string; + string _setName; + }; + + /** + * controls how much a clients cares about writes + * default is NORMAL + */ + enum WriteConcern { + W_NONE = 0 , // TODO: not every connection type fully supports this + W_NORMAL = 1 + // TODO SAFE = 2 }; class BSONObj; + class ScopedDbConnection; + class DBClientCursor; + class DBClientCursorBatchIterator; /** Represents a Mongo query expression. Typically one uses the QUERY(...) macro to construct a Query object. Examples: @@ -160,7 +299,7 @@ /** * if this query has an orderby, hint, or some other field */ - bool isComplex() const; + bool isComplex( bool * hasDollar = 0 ) const; BSONObj getFilter() const; BSONObj getSort() const; @@ -184,7 +323,7 @@ /** Typically one uses the QUERY(...) macro to construct a Query object. Example: QUERY( "age" << 33 << "school" << "UCLA" ) */ -#define QUERY(x) Query( BSON(x) ) +#define QUERY(x) mongo::Query( BSON(x) ) /** interface that handles communication with the db @@ -195,146 +334,14 @@ virtual bool call( Message &toSend, Message &response, bool assertOk=true ) = 0; virtual void say( Message &toSend ) = 0; virtual void sayPiggyBack( Message &toSend ) = 0; - virtual void checkResponse( const string &data, int nReturned ) {} - }; - - /** Queries return a cursor object */ - class DBClientCursor : boost::noncopyable { - friend class DBClientBase; - bool init(); - public: - /** If true, safe to call next(). Requests more from server if necessary. */ - bool more(); - - /** If true, there is more in our local buffers to be fetched via next(). Returns - false when a getMore request back to server would be required. You can use this - if you want to exhaust whatever data has been fetched to the client already but - then perhaps stop. - */ - bool moreInCurrentBatch() { return !_putBack.empty() || pos < nReturned; } + virtual void checkResponse( const char* data, int nReturned ) {} - /** next - @return next object in the result cursor. - on an error at the remote server, you will get back: - { $err: } - if you do not want to handle that yourself, call nextSafe(). - */ - BSONObj next(); - - /** - restore an object previously returned by next() to the cursor - */ - void putBack( const BSONObj &o ) { _putBack.push( o.getOwned() ); } - - /** throws AssertionException if get back { $err : ... } */ - BSONObj nextSafe() { - BSONObj o = next(); - BSONElement e = o.firstElement(); - assert( strcmp(e.fieldName(), "$err") != 0 ); - return o; - } - - /** - iterate the rest of the cursor and return the number if items - */ - int itcount(){ - int c = 0; - while ( more() ){ - next(); - c++; - } - return c; - } - - /** cursor no longer valid -- use with tailable cursors. - note you should only rely on this once more() returns false; - 'dead' may be preset yet some data still queued and locally - available from the dbclientcursor. - */ - bool isDead() const { - return cursorId == 0; - } - - bool tailable() const { - return (opts & QueryOption_CursorTailable) != 0; - } - - /** see QueryResult::ResultFlagType (db/dbmessage.h) for flag values - mostly these flags are for internal purposes - - ResultFlag_ErrSet is the possible exception to that - */ - bool hasResultFlag( int flag ){ - return (resultFlags & flag) != 0; - } - - DBClientCursor( DBConnector *_connector, const string &_ns, BSONObj _query, int _nToReturn, - int _nToSkip, const BSONObj *_fieldsToReturn, int queryOptions , int bs ) : - connector(_connector), - ns(_ns), - query(_query), - nToReturn(_nToReturn), - haveLimit( _nToReturn > 0 && !(queryOptions & QueryOption_CursorTailable)), - nToSkip(_nToSkip), - fieldsToReturn(_fieldsToReturn), - opts(queryOptions), - batchSize(bs), - m(new Message()), - cursorId(), - nReturned(), - pos(), - data(), - _ownCursor( true ) { - } - - DBClientCursor( DBConnector *_connector, const string &_ns, long long _cursorId, int _nToReturn, int options ) : - connector(_connector), - ns(_ns), - nToReturn( _nToReturn ), - haveLimit( _nToReturn > 0 && !(options & QueryOption_CursorTailable)), - opts( options ), - m(new Message()), - cursorId( _cursorId ), - nReturned(), - pos(), - data(), - _ownCursor( true ) { - } - - virtual ~DBClientCursor(); - - long long getCursorId() const { return cursorId; } - - /** by default we "own" the cursor and will send the server a KillCursor - message when ~DBClientCursor() is called. This function overrides that. - */ - void decouple() { _ownCursor = false; } - - private: - - int nextBatchSize(); + /* used by QueryOption_Exhaust. To use that your subclass must implement this. */ + virtual void recv( Message& m ) { assert(false); } - DBConnector *connector; - string ns; - BSONObj query; - int nToReturn; - bool haveLimit; - int nToSkip; - const BSONObj *fieldsToReturn; - int opts; - int batchSize; - auto_ptr m; - stack< BSONObj > _putBack; - - int resultFlags; - long long cursorId; - int nReturned; - int pos; - const char *data; - void dataReceived(); - void requestMore(); - bool _ownCursor; // see decouple() + virtual string getServerAddress() const = 0; }; - + /** The interface that any db connection should implement */ @@ -343,6 +350,7 @@ virtual auto_ptr query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ) = 0; + /** don't use this - called automatically by DBClientCursor for you */ virtual auto_ptr getMore( const string &ns, long long cursorId, int nToReturn = 0, int options = 0 ) = 0; virtual void insert( const string &ns, BSONObj obj ) = 0; @@ -359,7 +367,7 @@ @return a single object that matches the query. if none do, then the object is empty @throws AssertionException */ - virtual BSONObj findOne(const string &ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); + virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); }; @@ -371,33 +379,38 @@ class DBClientWithCommands : public DBClientInterface { set _seenIndexes; public: + /** controls how chatty the client is about network errors & such. See log.h */ + int _logLevel; + + DBClientWithCommands() : _logLevel(0), _cachedAvailableOptions( (enum QueryOptions)0 ), _haveCachedAvailableOptions(false) { } - /** helper function. run a simple command where the command expression is simply - { command : 1 } + /** helper function. run a simple command where the command expression is simply + { command : 1 } @param info -- where to put result object. may be null if caller doesn't need that info @param command -- command name - @return true if the command returned "ok". - */ + @return true if the command returned "ok". + */ bool simpleCommand(const string &dbname, BSONObj *info, const string &command); /** Run a database command. Database commands are represented as BSON objects. Common database commands have prebuilt helper functions -- see below. If a helper is not available you can - directly call runCommand. + directly call runCommand. @param dbname database name. Use "admin" for global administrative commands. @param cmd the command object to execute. For example, { ismaster : 1 } @param info the result object the database returns. Typically has { ok : ..., errmsg : ... } fields set. - @return true if the command returned "ok". + @param options see enum QueryOptions - normally not needed to run a command + @return true if the command returned "ok". */ virtual bool runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0); /** Authorize access to a particular database. - Authentication is separate for each database on the server -- you may authenticate for any - number of databases on a single connection. - The "admin" database is special and once authenticated provides access to all databases on the - server. - @param digestPassword if password is plain text, set this to true. otherwise assumed to be pre-digested + Authentication is separate for each database on the server -- you may authenticate for any + number of databases on a single connection. + The "admin" database is special and once authenticated provides access to all databases on the + server. + @param digestPassword if password is plain text, set this to true. otherwise assumed to be pre-digested @return true if successful */ virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true); @@ -425,17 +438,17 @@ If the collection already exists, no action occurs. - ns: fully qualified collection name - size: desired initial extent size for the collection. - Must be <= 1000000000 for normal collections. - For fixed size (capped) collections, this size is the total/max size of the - collection. - capped: if true, this is a fixed size collection (where old data rolls out). - max: maximum number of objects if capped (optional). + @param ns fully qualified collection name + @param size desired initial extent size for the collection. + Must be <= 1000000000 for normal collections. + For fixed size (capped) collections, this size is the total/max size of the + collection. + @param capped if true, this is a fixed size collection (where old data rolls out). + @param max maximum number of objects if capped (optional). returns true if successful. */ - bool createCollection(const string &ns, unsigned size = 0, bool capped = false, int max = 0, BSONObj *info = 0); + bool createCollection(const string &ns, long long size = 0, bool capped = false, int max = 0, BSONObj *info = 0); /** Get error result from the last operation on this connection. @return error message text, or empty string if no error. @@ -444,7 +457,9 @@ /** Get error result from the last operation on this connection. @return full error object. */ - BSONObj getLastErrorDetailed(); + virtual BSONObj getLastErrorDetailed(); + + static string getLastErrorString( const BSONObj& res ); /** Return the last error which has occurred, even if not the very last operation. @@ -595,6 +610,8 @@ /** get a list of all the current databases + uses the { listDatabases : 1 } command. + throws on error */ list getDatabaseNames(); @@ -605,7 +622,6 @@ bool exists( const string& ns ); - /** Create an index if it does not already exist. ensureIndex calls are remembered so it is safe/fast to call this function many times in your code. @@ -666,25 +682,39 @@ protected: bool isOk(const BSONObj&); - + + enum QueryOptions availableOptions(); + + private: + enum QueryOptions _cachedAvailableOptions; + bool _haveCachedAvailableOptions; }; /** abstract class that implements the core db operations */ class DBClientBase : public DBClientWithCommands, public DBConnector { + protected: + WriteConcern _writeConcern; + public: + DBClientBase(){ + _writeConcern = W_NORMAL; + } + + WriteConcern getWriteConcern() const { return _writeConcern; } + void setWriteConcern( WriteConcern w ){ _writeConcern = w; } + /** send a query to the database. - ns: namespace to query, format is .[.]* - query: query to perform on the collection. this is a BSONObj (binary JSON) + @param ns namespace to query, format is .[.]* + @param query query to perform on the collection. this is a BSONObj (binary JSON) You may format as { query: { ... }, orderby: { ... } } to specify a sort order. - nToReturn: n to return. 0 = unlimited - nToSkip: start with the nth item - fieldsToReturn: - optional template of which fields to select. if unspecified, returns all fields - queryOptions: see options enum at top of this file + @param nToReturn n to return. 0 = unlimited + @param nToSkip start with the nth item + @param fieldsToReturn optional template of which fields to select. if unspecified, returns all fields + @param queryOptions see options enum at top of this file @return cursor. 0 if error (connection failure) @throws AssertionException @@ -692,12 +722,13 @@ virtual auto_ptr query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ); - /** @param cursorId id of cursor to retrieve + /** don't use this - called automatically by DBClientCursor for you + @param cursorId id of cursor to retrieve @return an handle to a previously allocated cursor @throws AssertionException */ virtual auto_ptr getMore( const string &ns, long long cursorId, int nToReturn = 0, int options = 0 ); - + /** insert an object into the database */ @@ -717,11 +748,11 @@ /** updates objects matching query */ - virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 ); - - virtual string getServerAddress() const = 0; + virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = false , bool multi = false ); virtual bool isFailed() const = 0; + + virtual void killCursor( long long cursorID ) = 0; static int countCommas( const string& s ){ int n = 0; @@ -730,9 +761,18 @@ n++; return n; } - }; + + virtual bool callRead( Message& toSend , Message& response ) = 0; + // virtual bool callWrite( Message& toSend , Message& response ) = 0; // TODO: add this if needed + virtual void say( Message& toSend ) = 0; + + virtual ConnectionString::ConnectionType type() const = 0; + + /** @return true if conn is either equal to or contained in this connection */ + virtual bool isMember( const DBConnector * conn ) const = 0; + }; // DBClientBase - class DBClientPaired; + class DBClientReplicaSet; class ConnectException : public UserException { public: @@ -744,24 +784,31 @@ This is the main entry point for talking to a simple Mongo setup */ class DBClientConnection : public DBClientBase { - DBClientPaired *clientPaired; - auto_ptr p; - auto_ptr server; + DBClientReplicaSet *clientSet; + boost::scoped_ptr p; + boost::scoped_ptr server; bool failed; // true if some sort of fatal error has ever happened bool autoReconnect; time_t lastReconnectTry; - string serverAddress; // remember for reconnects + HostAndPort _server; // remember for reconnects + string _serverString; + int _port; void _checkConnection(); void checkConnection() { if( failed ) _checkConnection(); } map< string, pair > authCache; + int _timeout; + + bool _connect( string& errmsg ); public: /** @param _autoReconnect if true, automatically reconnect on a connection failure - @param cp used by DBClientPaired. You do not need to specify this parameter + @param cp used by DBClientReplicaSet. You do not need to specify this parameter + @param timeout tcp timeout in seconds - this is for read/write, not connect. + Connect timeout is fixed, but short, at 5 seconds. */ - DBClientConnection(bool _autoReconnect=false,DBClientPaired* cp=0) : - clientPaired(cp), failed(false), autoReconnect(_autoReconnect), lastReconnectTry(0) { } + DBClientConnection(bool _autoReconnect=false, DBClientReplicaSet* cp=0, int timeout=0) : + clientSet(cp), failed(false), autoReconnect(_autoReconnect), lastReconnectTry(0), _timeout(timeout) { } /** Connect to a Mongo database server. @@ -769,10 +816,27 @@ false was returned -- it will try to connect again. @param serverHostname host to connect to. can include port number ( 127.0.0.1 , 127.0.0.1:5555 ) + If you use IPv6 you must add a port number ( ::1:27017 ) + @param errmsg any relevant error message will appended to the string + @deprecated please use HostAndPort + @return false if fails to connect. + */ + virtual bool connect(const char * hostname, string& errmsg){ + // TODO: remove this method + HostAndPort t( hostname ); + return connect( t , errmsg ); + } + + /** Connect to a Mongo database server. + + If autoReconnect is true, you can try to use the DBClientConnection even when + false was returned -- it will try to connect again. + + @param server server to connect to. @param errmsg any relevant error message will appended to the string @return false if fails to connect. */ - virtual bool connect(const string &serverHostname, string& errmsg); + virtual bool connect(const HostAndPort& server, string& errmsg); /** Connect to a Mongo database server. Exception throwing version. Throws a UserException if cannot connect. @@ -782,20 +846,26 @@ @param serverHostname host to connect to. can include port number ( 127.0.0.1 , 127.0.0.1:5555 ) */ - void connect(string serverHostname) { + void connect(const string& serverHostname) { string errmsg; - if( !connect(serverHostname.c_str(), errmsg) ) + if( !connect(HostAndPort(serverHostname), errmsg) ) throw ConnectException(string("can't connect ") + errmsg); } virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true); - virtual auto_ptr query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, + virtual auto_ptr query(const string &ns, Query query=Query(), int nToReturn = 0, int nToSkip = 0, const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ) { checkConnection(); return DBClientBase::query( ns, query, nToReturn, nToSkip, fieldsToReturn, queryOptions , batchSize ); } + /** uses QueryOption_Exhaust + use DBClientCursorBatchIterator if you want to do items in large blocks, perhpas to avoid granular locking and such. + */ + unsigned long long query( boost::function f, const string& ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); + unsigned long long query( boost::function f, const string& ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); + /** @return true if this connection is currently in a failed state. When autoreconnect is on, a connection will transition back to an ok state after reconnecting. @@ -805,67 +875,80 @@ } MessagingPort& port() { - return *p.get(); + return *p; } string toStringLong() const { stringstream ss; - ss << serverAddress; + ss << _serverString; if ( failed ) ss << " failed"; return ss.str(); } /** Returns the address of the server */ string toString() { - return serverAddress; + return _serverString; } string getServerAddress() const { - return serverAddress; + return _serverString; + } + + virtual void killCursor( long long cursorID ); + + virtual bool callRead( Message& toSend , Message& response ){ + return call( toSend , response ); } - virtual bool call( Message &toSend, Message &response, bool assertOk = true ); virtual void say( Message &toSend ); - virtual void sayPiggyBack( Message &toSend ); + virtual bool call( Message &toSend, Message &response, bool assertOk = true ); + + virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; } + + virtual bool isMember( const DBConnector * conn ) const { return this == conn; }; + virtual void checkResponse( const char *data, int nReturned ); - }; - /** Use this class to connect to a replica pair of servers. The class will manage - checking for which server in a replica pair is master, and do failover automatically. + protected: + friend class SyncClusterConnection; + virtual void recv( Message& m ); + virtual void sayPiggyBack( Message &toSend ); + }; + + /** Use this class to connect to a replica set of servers. The class will manage + checking for which server in a replica set is master, and do failover automatically. + + This can also be used to connect to replica pairs since pairs are a subset of sets + On a failover situation, expect at least one operation to return an error (throw an exception) before the failover is complete. Operations are not retried. */ - class DBClientPaired : public DBClientBase { - DBClientConnection left,right; - enum State { - NotSetL=0, - NotSetR=1, - Left, Right - } master; + class DBClientReplicaSet : public DBClientBase { + string _name; + DBClientConnection * _currentMaster; + vector _servers; + vector _conns; + void _checkMaster(); - DBClientConnection& checkMaster(); + DBClientConnection * checkMaster(); public: - /** Call connect() after constructing. autoReconnect is always on for DBClientPaired connections. */ - DBClientPaired(); + /** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet connections. */ + DBClientReplicaSet( const string& name , const vector& servers ); + virtual ~DBClientReplicaSet(); - /** Returns false is neither member of the pair were reachable, or neither is + /** Returns false if nomember of the set were reachable, or neither is master, although, when false returned, you can still try to use this connection object, it will try reconnects. */ - bool connect(const string &serverHostname1, const string &serverHostname2); + bool connect(); - /** Connect to a server pair using a host pair string of the form - hostname[:port],hostname[:port] - */ - bool connect(string hostpairstring); - - /** Authorize. Authorizes both sides of the pair as needed. + /** Authorize. Authorizes all nodes as needed */ - bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg); + virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true ); /** throws userassertion "no master found" */ virtual @@ -874,56 +957,72 @@ /** throws userassertion "no master found" */ virtual - BSONObj findOne(const string &ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); + BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); /** insert */ virtual void insert( const string &ns , BSONObj obj ) { - checkMaster().insert(ns, obj); + checkMaster()->insert(ns, obj); } /** insert multiple objects. Note that single object insert is asynchronous, so this version is only nominally faster and not worth a special effort to try to use. */ virtual void insert( const string &ns, const vector< BSONObj >& v ) { - checkMaster().insert(ns, v); + checkMaster()->insert(ns, v); } /** remove */ virtual void remove( const string &ns , Query obj , bool justOne = 0 ) { - checkMaster().remove(ns, obj, justOne); + checkMaster()->remove(ns, obj, justOne); } /** update */ virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 ) { - return checkMaster().update(ns, query, obj, upsert,multi); + return checkMaster()->update(ns, query, obj, upsert,multi); } + virtual void killCursor( long long cursorID ){ + checkMaster()->killCursor( cursorID ); + } + string toString(); /* this is the callback from our underlying connections to notify us that we got a "not master" error. */ void isntMaster() { - master = ( ( master == Left ) ? NotSetR : NotSetL ); + _currentMaster = 0; } - string getServerAddress() const { - return left.getServerAddress() + "," + right.getServerAddress(); - } - + string getServerAddress() const; + + DBClientConnection& masterConn(); DBClientConnection& slaveConn(); - /* TODO - not yet implemented. mongos may need these. */ - virtual bool call( Message &toSend, Message &response, bool assertOk=true ) { assert(false); return false; } - virtual void say( Message &toSend ) { assert(false); } - virtual void sayPiggyBack( Message &toSend ) { assert(false); } - virtual void checkResponse( const char *data, int nReturned ) { assert(false); } + + virtual bool call( Message &toSend, Message &response, bool assertOk=true ) { return checkMaster()->call( toSend , response , assertOk ); } + virtual void say( Message &toSend ) { checkMaster()->say( toSend ); } + virtual bool callRead( Message& toSend , Message& response ){ return checkMaster()->callRead( toSend , response ); } + + virtual ConnectionString::ConnectionType type() const { return ConnectionString::SET; } + + virtual bool isMember( const DBConnector * conn ) const; + + virtual void checkResponse( const char *data, int nReturned ) { checkMaster()->checkResponse( data , nReturned ); } + + protected: + virtual void sayPiggyBack( Message &toSend ) { checkMaster()->say( toSend ); } bool isFailed() const { - // TODO: this really should check isFailed on current master as well - return master < Left; + return _currentMaster == 0 || _currentMaster->isFailed(); } }; + /** pings server to check if it's up + */ + bool serverAlive( const string &uri ); DBClientBase * createDirectClient(); } // namespace mongo + +#include "dbclientcursor.h" +#include "undef_macros.h" diff -Nru mongodb-1.4.4/client/distlock.cpp mongodb-1.6.3/client/distlock.cpp --- mongodb-1.4.4/client/distlock.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/distlock.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,239 @@ +// @file distlock.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pch.h" +#include "dbclient.h" +#include "distlock.h" + +namespace mongo { + + string lockPingNS = "config.lockpings"; + + ThreadLocalValue distLockIds(""); + + string getDistLockProcess(){ + static string s; + if ( s.empty() ){ + stringstream ss; + ss << getHostNameCached() << ":" << time(0) << ":" << rand(); + s = ss.str(); + } + return s; + } + + string getDistLockId(){ + string s = distLockIds.get(); + if ( s.empty() ){ + stringstream ss; + ss << getDistLockProcess() << ":" << getThreadName() << ":" << rand(); + s = ss.str(); + distLockIds.set( s ); + } + return s; + } + + void distLockPingThread( ConnectionString addr ){ + setThreadName( "LockPinger" ); + + static int loops = 0; + while( ! inShutdown() ){ + try { + ScopedDbConnection conn( addr ); + + // do ping + conn->update( lockPingNS , + BSON( "_id" << getDistLockProcess() ) , + BSON( "$set" << BSON( "ping" << DATENOW ) ) , + true ); + + + // remove really old entries + BSONObjBuilder f; + f.appendDate( "$lt" , jsTime() - ( 4 * 86400 * 1000 ) ); + BSONObj r = BSON( "ping" << f.obj() ); + conn->remove( lockPingNS , r ); + + // create index so remove is fast even with a lot of servers + if ( loops++ == 0 ){ + conn->ensureIndex( lockPingNS , BSON( "ping" << 1 ) ); + } + + conn.done(); + } + catch ( std::exception& e ){ + log( LL_WARNING ) << "couldn't ping: " << e.what() << endl; + } + sleepsecs(30); + } + } + + + class DistributedLockPinger { + public: + DistributedLockPinger() + : _mutex( "DistributedLockPinger" ){ + } + + void got( const ConnectionString& conn ){ + string s = conn.toString(); + scoped_lock lk( _mutex ); + if ( _seen.count( s ) > 0 ) + return; + boost::thread t( boost::bind( &distLockPingThread , conn ) ); + _seen.insert( s ); + } + + set _seen; + mongo::mutex _mutex; + + } distLockPinger; + + DistributedLock::DistributedLock( const ConnectionString& conn , const string& name , unsigned takeoverMinutes ) + : _conn(conn),_name(name),_takeoverMinutes(takeoverMinutes){ + _id = BSON( "_id" << name ); + _ns = "config.locks"; + distLockPinger.got( conn ); + } + + + bool DistributedLock::lock_try( string why , BSONObj * other ){ + ScopedDbConnection conn( _conn ); + + BSONObjBuilder queryBuilder; + queryBuilder.appendElements( _id ); + queryBuilder.append( "state" , 0 ); + + { // make sure its there so we can use simple update logic below + BSONObj o = conn->findOne( _ns , _id ); + if ( o.isEmpty() ){ + try { + conn->insert( _ns , BSON( "_id" << _name << "state" << 0 << "who" << "" ) ); + } + catch ( UserException& ){ + } + } + else if ( o["state"].numberInt() > 0 ){ + BSONObj lastPing = conn->findOne( lockPingNS , o["process"].wrap( "_id" ) ); + if ( lastPing.isEmpty() ){ + // TODO: maybe this should clear, not sure yet + log() << "lastPing is empty! this could be bad: " << o << endl; + conn.done(); + return false; + } + + unsigned long long elapsed = jsTime() - lastPing["ping"].Date(); // in ms + elapsed = elapsed / ( 1000 * 60 ); // convert to minutes + + if ( elapsed <= _takeoverMinutes ){ + log(1) << "dist_lock lock failed because taken by: " << o << endl; + conn.done(); + return false; + } + + log() << "dist_lock forcefully taking over from: " << o << " elapsed minutes: " << elapsed << endl; + conn->update( _ns , _id , BSON( "$set" << BSON( "state" << 0 ) ) ); + } + else if ( o["ts"].type() ){ + queryBuilder.append( o["ts"] ); + } + } + + OID ts; + ts.init(); + + bool gotLock = false; + BSONObj now; + + BSONObj whatIWant = BSON( "$set" << BSON( "state" << 1 << + "who" << getDistLockId() << "process" << getDistLockProcess() << + "when" << DATENOW << "why" << why << "ts" << ts ) ); + try { + conn->update( _ns , queryBuilder.obj() , whatIWant ); + + BSONObj o = conn->getLastErrorDetailed(); + now = conn->findOne( _ns , _id ); + + if ( o["n"].numberInt() == 0 ){ + if ( other ) + *other = now; + gotLock = false; + } + else { + gotLock = true; + } + + } + catch ( UpdateNotTheSame& up ){ + // this means our update got through on some, but not others + + for ( unsigned i=0; ifindOne( _ns , _id ); + + if ( now.isEmpty() || now["ts"] < temp2["ts"] ){ + now = temp2.getOwned(); + } + + temp.done(); + } + + if ( now["ts"].OID() == ts ){ + gotLock = true; + conn->update( _ns , _id , whatIWant ); + } + else { + gotLock = false; + } + } + + conn.done(); + + log(1) << "dist_lock lock gotLock: " << gotLock << " now: " << now << endl; + + if ( ! gotLock ) + return false; + + return true; + } + + void DistributedLock::unlock(){ + const int maxAttempts = 3; + int attempted = 0; + while ( ++attempted <= maxAttempts ) { + + try { + ScopedDbConnection conn( _conn ); + conn->update( _ns , _id, BSON( "$set" << BSON( "state" << 0 ) ) ); + log(1) << "dist_lock unlock: " << conn->findOne( _ns , _id ) << endl; + conn.done(); + + return; + + + } catch ( std::exception& e) { + log( LL_WARNING ) << "dist_lock " << _name << " failed to contact config server in unlock attempt " + << attempted << ": " << e.what() << endl; + + sleepsecs(1 << attempted); + } + } + + log( LL_WARNING ) << "dist_lock couldn't consumate unlock request. " << "Lock " << _name + << " will be taken over after " << _takeoverMinutes << " minutes timeout" << endl; + } + +} diff -Nru mongodb-1.4.4/client/distlock.h mongodb-1.6.3/client/distlock.h --- mongodb-1.4.4/client/distlock.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/distlock.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,81 @@ +// distlock.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * distributed locking mechanism + */ + +#include "../pch.h" +#include "dbclient.h" +#include "connpool.h" +#include "redef_macros.h" +#include "syncclusterconnection.h" + +namespace mongo { + + class DistributedLock { + public: + + /** + * @param takeoverMinutes how long before we steal lock in minutes + */ + DistributedLock( const ConnectionString& conn , const string& name , unsigned takeoverMinutes = 10 ); + + bool lock_try( string why , BSONObj * other = 0 ); + void unlock(); + + private: + ConnectionString _conn; + string _name; + unsigned _takeoverMinutes; + + string _ns; + BSONObj _id; + }; + + class dist_lock_try { + public: + + dist_lock_try( DistributedLock * lock , string why ) + : _lock(lock){ + _got = _lock->lock_try( why , &_other ); + } + + ~dist_lock_try(){ + if ( _got ){ + _lock->unlock(); + } + } + + bool got() const { + return _got; + } + + BSONObj other() const { + return _other; + } + + private: + DistributedLock * _lock; + bool _got; + BSONObj _other; + + }; + +} + diff -Nru mongodb-1.4.4/client/distlock_test.cpp mongodb-1.6.3/client/distlock_test.cpp --- mongodb-1.4.4/client/distlock_test.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/distlock_test.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,80 @@ +// distlock_test.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../pch.h" +#include "dbclient.h" +#include "distlock.h" +#include "../db/commands.h" + +namespace mongo { + + class TestDistLockWithSync : public Command { + public: + TestDistLockWithSync() : Command( "_testDistLockWithSyncCluster" ){} + virtual void help( stringstream& help ) const { + help << "should not be calling this directly" << endl; + } + + virtual bool slaveOk() const { return false; } + virtual bool adminOnly() const { return true; } + virtual LockType locktype() const { return NONE; } + + static void runThread(){ + for ( int i=0; i<1000; i++ ){ + if ( current->lock_try( "test" ) ){ + gotit++; + for ( int j=0; j<2000; j++ ){ + count++; + } + current->unlock(); + } + } + } + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + DistributedLock lk( ConnectionString( cmdObj["host"].String() , ConnectionString::SYNC ), "testdistlockwithsync" ); + current = &lk; + count = 0; + gotit = 0; + + vector > l; + for ( int i=0; i<4; i++ ){ + l.push_back( shared_ptr( new boost::thread( runThread ) ) ); + } + + for ( unsigned i=0; ijoin(); + + result.append( "count" , count ); + result.append( "gotit" , gotit ); + current = 0; + return count == gotit * 2000; + } + + static DistributedLock * current; + static int count; + static int gotit; + + } testDistLockWithSyncCmd; + + + DistributedLock * TestDistLockWithSync::current; + int TestDistLockWithSync::count; + int TestDistLockWithSync::gotit; + + +} diff -Nru mongodb-1.4.4/client/examples/clientTest.cpp mongodb-1.6.3/client/examples/clientTest.cpp --- mongodb-1.4.4/client/examples/clientTest.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/examples/clientTest.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -137,10 +137,14 @@ assert( conn.getLastError() == "" ); // nonexistent index test - assert( conn.findOne(ns, Query("{name:\"eliot\"}").hint("{foo:1}")).hasElement("$err") ); - assert( conn.getLastError() == "bad hint" ); - conn.resetError(); - assert( conn.getLastError() == "" ); + bool asserted = false; + try { + conn.findOne(ns, Query("{name:\"eliot\"}").hint("{foo:1}")); + } + catch ( ... ){ + asserted = true; + } + assert( asserted ); //existing index assert( conn.findOne(ns, Query("{name:'eliot'}").hint("{name:1}")).hasElement("name") ); @@ -176,8 +180,9 @@ } BSONObj found = conn.findOne( tsns , mongo::BSONObj() ); + cout << "old: " << out << "\nnew: " << found << endl; assert( ( oldTime < found["ts"].timestampTime() ) || - ( oldInc + 1 == found["ts"].timestampInc() ) ); + ( oldTime == found["ts"].timestampTime() && oldInc < found["ts"].timestampInc() ) ); } @@ -185,9 +190,9 @@ assert( conn.getLastError().empty() ); BufBuilder b; - b.append( (int)0 ); // reserved - b.append( (int)-1 ); // invalid # of cursors triggers exception - b.append( (int)-1 ); // bogus cursor id + b.appendNum( (int)0 ); // reserved + b.appendNum( (int)-1 ); // invalid # of cursors triggers exception + b.appendNum( (int)-1 ); // bogus cursor id Message m; m.setData( dbKillCursors, b.buf(), b.len() ); diff -Nru mongodb-1.4.4/client/examples/tail.cpp mongodb-1.6.3/client/examples/tail.cpp --- mongodb-1.4.4/client/examples/tail.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/examples/tail.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -22,34 +22,25 @@ using namespace mongo; -void foo() { } +void tail(DBClientBase& conn, const char *ns) { + BSONElement lastId = minKey.firstElement(); + Query query = Query(); -/* "tail" the specified namespace, outputting elements as they are added. - _id values must be inserted in increasing order for this to work. (Some other - field could also be used.) + auto_ptr c = + conn.query(ns, query, 0, 0, 0, QueryOption_CursorTailable); - Note: one could use a capped collection and $natural order to do something - similar, using sort({$natural:1}), and then not need to worry about - _id's being in order. -*/ -void tail(DBClientBase& conn, const char *ns) { - conn.ensureIndex(ns, fromjson("{_id:1}")); - BSONElement lastId; - Query query = Query().sort("_id"); while( 1 ) { - auto_ptr c = conn.query(ns, query, 0, 0, 0, Option_CursorTailable); - while( 1 ) { - if( !c->more() ) { - if( c->isDead() ) { - // we need to requery - break; - } - sleepsecs(1); + if( !c->more() ) { + if( c->isDead() ) { + break; // we need to requery + } + + // all data (so far) exhausted, wait for more + sleepsecs(1); + continue; } BSONObj o = c->next(); lastId = o["_id"]; cout << o.toString() << endl; - } - query = QUERY( "_id" << GT << lastId ).sort("_id"); } } diff -Nru mongodb-1.4.4/client/gridfs.cpp mongodb-1.6.3/client/gridfs.cpp --- mongodb-1.4.4/client/gridfs.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/gridfs.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "../stdafx.h" +#include "pch.h" #include #include @@ -34,15 +34,15 @@ const unsigned DEFAULT_CHUNK_SIZE = 256 * 1024; - Chunk::Chunk( BSONObj o ){ + GridFSChunk::GridFSChunk( BSONObj o ){ _data = o; } - Chunk::Chunk( BSONObj fileObject , int chunkNumber , const char * data , int len ){ + GridFSChunk::GridFSChunk( BSONObj fileObject , int chunkNumber , const char * data , int len ){ BSONObjBuilder b; b.appendAs( fileObject["_id"] , "files_id" ); b.append( "n" , chunkNumber ); - b.appendBinDataArray( "data" , data , len ); + b.appendBinData( "data" , len, BinDataGeneral, data ); _data = b.obj(); } @@ -50,7 +50,7 @@ GridFS::GridFS( DBClientBase& client , const string& dbName , const string& prefix ) : _client( client ) , _dbName( dbName ) , _prefix( prefix ){ _filesNS = dbName + "." + prefix + ".files"; _chunksNS = dbName + "." + prefix + ".chunks"; - + _chunkSize = DEFAULT_CHUNK_SIZE; client.ensureIndex( _filesNS , BSON( "filename" << 1 ) ); client.ensureIndex( _chunksNS , BSON( "files_id" << 1 << "n" << 1 ) ); @@ -60,8 +60,12 @@ } + void GridFS::setChunkSize(unsigned int size) { + massert( 13296 , "invalid chunk size is specified", (size == 0)); + _chunkSize = size; + } + BSONObj GridFS::storeFile( const char* data , size_t length , const string& remoteName , const string& contentType){ - massert( 10279 , "large files not yet implemented", length <= 0xffffffff); char const * const end = data + length; OID id; @@ -70,8 +74,8 @@ int chunkNumber = 0; while (data < end){ - int chunkLen = MIN(DEFAULT_CHUNK_SIZE, (unsigned)(end-data)); - Chunk c(idObj, chunkNumber, data, chunkLen); + int chunkLen = MIN(_chunkSize, (unsigned)(end-data)); + GridFSChunk c(idObj, chunkNumber, data, chunkLen); _client.insert( _chunksNS.c_str() , c._data ); chunkNumber++; @@ -99,33 +103,33 @@ int chunkNumber = 0; gridfs_offset length = 0; while (!feof(fd)){ - boost::scoped_arraybuf (new char[DEFAULT_CHUNK_SIZE]); - char* bufPos = buf.get(); + //boost::scoped_arraybuf (new char[_chunkSize+1]); + char * buf = new char[_chunkSize+1]; + char* bufPos = buf;//.get(); unsigned int chunkLen = 0; // how much in the chunk now - while(chunkLen != DEFAULT_CHUNK_SIZE && !feof(fd)){ - int readLen = fread(bufPos, 1, DEFAULT_CHUNK_SIZE - chunkLen, fd); + while(chunkLen != _chunkSize && !feof(fd)){ + int readLen = fread(bufPos, 1, _chunkSize - chunkLen, fd); chunkLen += readLen; bufPos += readLen; - assert(chunkLen <= DEFAULT_CHUNK_SIZE); + assert(chunkLen <= _chunkSize); } - Chunk c(idObj, chunkNumber, buf.get(), chunkLen); + GridFSChunk c(idObj, chunkNumber, buf, chunkLen); _client.insert( _chunksNS.c_str() , c._data ); length += chunkLen; chunkNumber++; + delete[] buf; } if (fd != stdin) fclose( fd ); - massert( 10280 , "large files not yet implemented", length <= 0xffffffff); - return insertFile((remoteName.empty() ? fileName : remoteName), id, length, contentType); } - BSONObj GridFS::insertFile(const string& name, const OID& id, unsigned length, const string& contentType){ + BSONObj GridFS::insertFile(const string& name, const OID& id, gridfs_offset length, const string& contentType){ BSONObj res; if ( ! _client.runCommand( _dbName.c_str() , BSON( "filemd5" << id << "root" << _prefix ) , res ) ) @@ -134,12 +138,17 @@ BSONObjBuilder file; file << "_id" << id << "filename" << name - << "length" << (unsigned) length - << "chunkSize" << DEFAULT_CHUNK_SIZE + << "chunkSize" << _chunkSize << "uploadDate" << DATENOW << "md5" << res["md5"] ; + if (length < 1024*1024*1024){ // 2^30 + file << "length" << (int) length; + }else{ + file << "length" << (long long) length; + } + if (!contentType.empty()) file << "contentType" << contentType; @@ -190,7 +199,7 @@ return meta_element.embeddedObject(); } - Chunk GridFile::getChunk( int n ){ + GridFSChunk GridFile::getChunk( int n ){ _exists(); BSONObjBuilder b; b.appendAs( _obj["_id"] , "files_id" ); @@ -198,7 +207,7 @@ BSONObj o = _grid->_client.findOne( _grid->_chunksNS.c_str() , b.obj() ); uassert( 10014 , "chunk is empty!" , ! o.isEmpty() ); - return Chunk(o); + return GridFSChunk(o); } gridfs_offset GridFile::write( ostream & out ){ @@ -207,7 +216,7 @@ const int num = getNumChunks(); for ( int i=0; iupdate( getNS() , q , o ); + conn->update( getNS() , q , o , true ); } @@ -94,4 +123,16 @@ throw UserException( 9003 , (string)"error on Model::save: " + errmsg ); } + BSONObj Model::toObject(){ + BSONObjBuilder b; + serialize( b ); + return b.obj(); + } + + void Model::append( const char * name , BSONObjBuilder& b ){ + BSONObjBuilder bb( b.subobjStart( name ) ); + serialize( bb ); + bb.done(); + } + } // namespace mongo diff -Nru mongodb-1.4.4/client/model.h mongodb-1.6.3/client/model.h --- mongodb-1.4.4/client/model.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/model.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,6 +18,7 @@ #pragma once #include "dbclient.h" +#include "redef_macros.h" namespace mongo { @@ -40,7 +41,9 @@ virtual const char * getNS() = 0; virtual void serialize(BSONObjBuilder& to) = 0; virtual void unserialize(const BSONObj& from) = 0; - + virtual BSONObj toObject(); + virtual void append( const char * name , BSONObjBuilder& b ); + virtual string modelServer() = 0; /** Load a single object. @@ -55,3 +58,5 @@ }; } // namespace mongo + +#include "undef_macros.h" diff -Nru mongodb-1.4.4/client/parallel.cpp mongodb-1.6.3/client/parallel.cpp --- mongodb-1.4.4/client/parallel.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/parallel.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,12 +16,13 @@ */ -#include "stdafx.h" +#include "pch.h" #include "parallel.h" #include "connpool.h" #include "../db/queryutil.h" #include "../db/dbmessage.h" #include "../s/util.h" +#include "../s/shard.h" namespace mongo { @@ -31,8 +32,13 @@ _ns = q.ns; _query = q.query.copy(); _options = q.queryOptions; - _fields = q.fields; + _fields = q.fields.copy(); + _batchSize = q.ntoreturn; + if ( _batchSize == 1 ) + _batchSize = 2; + _done = false; + _didInit = false; } ClusteredCursor::ClusteredCursor( const string& ns , const BSONObj& q , int options , const BSONObj& fields ){ @@ -40,37 +46,84 @@ _query = q.getOwned(); _options = options; _fields = fields.getOwned(); + _batchSize = 0; + _done = false; + _didInit = false; } ClusteredCursor::~ClusteredCursor(){ _done = true; // just in case } + + void ClusteredCursor::init(){ + if ( _didInit ) + return; + _didInit = true; + _init(); + } - auto_ptr ClusteredCursor::query( const string& server , int num , BSONObj extra ){ + auto_ptr ClusteredCursor::query( const string& server , int num , BSONObj extra , int skipLeft ){ uassert( 10017 , "cursor already done" , ! _done ); + assert( _didInit ); BSONObj q = _query; if ( ! extra.isEmpty() ){ q = concatQuery( q , extra ); } - ScopedDbConnection conn( server ); - checkShardVersion( conn.conn() , _ns ); + ShardConnection conn( server , _ns ); + + if ( conn.setVersion() ){ + conn.done(); + throw StaleConfigException( _ns , "ClusteredCursor::query ShardConnection had to change" , true ); + } + + if ( logLevel >= 5 ){ + log(5) << "ClusteredCursor::query (" << type() << ") server:" << server + << " ns:" << _ns << " query:" << q << " num:" << num + << " _fields:" << _fields << " options: " << _options << endl; + } + + auto_ptr cursor = + conn->query( _ns , q , num , 0 , ( _fields.isEmpty() ? 0 : &_fields ) , _options , _batchSize == 0 ? 0 : _batchSize + skipLeft ); - log(5) << "ClusteredCursor::query server:" << server << " ns:" << _ns << " query:" << q << " num:" << num << " _fields:" << _fields << " options: " << _options << endl; - auto_ptr cursor = conn->query( _ns.c_str() , q , num , 0 , ( _fields.isEmpty() ? 0 : &_fields ) , _options ); - if ( cursor->hasResultFlag( QueryResult::ResultFlag_ShardConfigStale ) ) + assert( cursor.get() ); + + if ( cursor->hasResultFlag( ResultFlag_ShardConfigStale ) ){ + conn.done(); throw StaleConfigException( _ns , "ClusteredCursor::query" ); + } + + if ( cursor->hasResultFlag( ResultFlag_ErrSet ) ){ + conn.done(); + BSONObj o = cursor->next(); + throw UserException( o["code"].numberInt() , o["$err"].String() ); + } + + + cursor->attach( &conn ); conn.done(); return cursor; } + BSONObj ClusteredCursor::explain( const string& server , BSONObj extra ){ + BSONObj q = _query; + if ( ! extra.isEmpty() ){ + q = concatQuery( q , extra ); + } + + ShardConnection conn( server , _ns ); + BSONObj o = conn->findOne( _ns , Query( q ).explain() ); + conn.done(); + return o; + } + BSONObj ClusteredCursor::concatQuery( const BSONObj& query , const BSONObj& extraFilter ){ if ( ! query.hasField( "query" ) ) return _concatFilter( query , extraFilter ); - + BSONObjBuilder b; BSONObjIterator i( query ); while ( i.more() ){ @@ -94,6 +147,112 @@ // TODO: should do some simplification here if possibl ideally } + BSONObj ClusteredCursor::explain(){ + BSONObjBuilder b; + b.append( "clusteredType" , type() ); + + long long nscanned = 0; + long long nscannedObjects = 0; + long long n = 0; + long long millis = 0; + double numExplains = 0; + + map > out; + { + _explain( out ); + + BSONObjBuilder x( b.subobjStart( "shards" ) ); + for ( map >::iterator i=out.begin(); i!=out.end(); ++i ){ + string shard = i->first; + list l = i->second; + BSONArrayBuilder y( x.subarrayStart( shard.c_str() ) ); + for ( list::iterator j=l.begin(); j!=l.end(); ++j ){ + BSONObj temp = *j; + y.append( temp ); + + nscanned += temp["nscanned"].numberLong(); + nscannedObjects += temp["nscannedObjects"].numberLong(); + n += temp["n"].numberLong(); + millis += temp["millis"].numberLong(); + numExplains++; + } + y.done(); + } + x.done(); + } + + b.appendNumber( "nscanned" , nscanned ); + b.appendNumber( "nscannedObjects" , nscannedObjects ); + b.appendNumber( "n" , n ); + b.appendNumber( "millisTotal" , millis ); + b.append( "millisAvg" , (int)((double)millis / numExplains ) ); + b.append( "numQueries" , (int)numExplains ); + b.append( "numShards" , (int)out.size() ); + + return b.obj(); + } + + // -------- FilteringClientCursor ----------- + FilteringClientCursor::FilteringClientCursor( const BSONObj filter ) + : _matcher( filter ) , _done( true ){ + } + + FilteringClientCursor::FilteringClientCursor( auto_ptr cursor , const BSONObj filter ) + : _matcher( filter ) , _cursor( cursor ) , _done( cursor.get() == 0 ){ + } + + FilteringClientCursor::~FilteringClientCursor(){ + } + + void FilteringClientCursor::reset( auto_ptr cursor ){ + _cursor = cursor; + _next = BSONObj(); + _done = _cursor.get() == 0; + } + + bool FilteringClientCursor::more(){ + if ( ! _next.isEmpty() ) + return true; + + if ( _done ) + return false; + + _advance(); + return ! _next.isEmpty(); + } + + BSONObj FilteringClientCursor::next(){ + assert( ! _next.isEmpty() ); + assert( ! _done ); + + BSONObj ret = _next; + _next = BSONObj(); + _advance(); + return ret; + } + + BSONObj FilteringClientCursor::peek(){ + if ( _next.isEmpty() ) + _advance(); + return _next; + } + + void FilteringClientCursor::_advance(){ + assert( _next.isEmpty() ); + if ( ! _cursor.get() || _done ) + return; + + while ( _cursor->more() ){ + _next = _cursor->next(); + if ( _matcher.matches( _next ) ){ + if ( ! _cursor->moreInCurrentBatch() ) + _next = _next.getOwned(); + return; + } + _next = BSONObj(); + } + _done = true; + } // -------- SerialServerClusteredCursor ----------- @@ -107,10 +266,21 @@ sort( _servers.rbegin() , _servers.rend() ); _serverIndex = 0; + + _needToSkip = q.ntoskip; } bool SerialServerClusteredCursor::more(){ - if ( _current.get() && _current->more() ) + + // TODO: optimize this by sending on first query and then back counting + // tricky in case where 1st server doesn't have any after + // need it to send n skipped + while ( _needToSkip > 0 && _current.more() ){ + _current.next(); + _needToSkip--; + } + + if ( _current.more() ) return true; if ( _serverIndex >= _servers.size() ){ @@ -119,17 +289,21 @@ ServerAndQuery& sq = _servers[_serverIndex++]; - _current = query( sq._server , 0 , sq._extra ); - if ( _current->more() ) - return true; - - // this sq has nothing, so keep looking + _current.reset( query( sq._server , 0 , sq._extra ) ); return more(); } BSONObj SerialServerClusteredCursor::next(){ uassert( 10018 , "no more items" , more() ); - return _current->next(); + return _current.next(); + } + + void SerialServerClusteredCursor::_explain( map< string,list >& out ){ + for ( unsigned i=0; i<_servers.size(); i++ ){ + ServerAndQuery& sq = _servers[i]; + list & l = out[sq._server]; + l.push_back( explain( sq._server , sq._extra ) ); + } } // -------- ParallelSortClusteredCursor ----------- @@ -138,7 +312,8 @@ const BSONObj& sortKey ) : ClusteredCursor( q ) , _servers( servers ){ _sortKey = sortKey.getOwned(); - _init(); + _needToSkip = q.ntoskip; + _finishCons(); } ParallelSortClusteredCursor::ParallelSortClusteredCursor( const set& servers , const string& ns , @@ -146,85 +321,123 @@ int options , const BSONObj& fields ) : ClusteredCursor( ns , q.obj , options , fields ) , _servers( servers ){ _sortKey = q.getSort().copy(); - _init(); + _needToSkip = 0; + _finishCons(); } - void ParallelSortClusteredCursor::_init(){ + void ParallelSortClusteredCursor::_finishCons(){ _numServers = _servers.size(); - _cursors = new auto_ptr[_numServers]; - _nexts = new BSONObj[_numServers]; + _cursors = 0; + + if ( ! _sortKey.isEmpty() && ! _fields.isEmpty() ){ + // we need to make sure the sort key is in the project + bool isNegative = false; + BSONObjBuilder b; + { + BSONObjIterator i( _fields ); + while ( i.more() ){ + BSONElement e = i.next(); + b.append( e ); + if ( ! e.trueValue() ) + isNegative = true; + } + } + + { + BSONObjIterator i( _sortKey ); + while ( i.more() ){ + BSONElement e = i.next(); + BSONElement f = _fields.getField( e.fieldName() ); + if ( isNegative ){ + uassert( 13431 , "have to have sort key in projection and removing it" , f.eoo() ); + } + else if ( f.eoo() ){ + // add to projection + b.append( e ); + } + } + } + + _fields = b.obj(); + } + } + + void ParallelSortClusteredCursor::_init(){ + assert( ! _cursors ); + _cursors = new FilteringClientCursor[_numServers]; // TODO: parellize int num = 0; - for ( set::iterator i = _servers.begin(); i!=_servers.end(); i++ ){ + for ( set::iterator i = _servers.begin(); i!=_servers.end(); ++i ){ const ServerAndQuery& sq = *i; - _cursors[num++] = query( sq._server , 0 , sq._extra ); + _cursors[num++].reset( query( sq._server , 0 , sq._extra , _needToSkip ) ); } } ParallelSortClusteredCursor::~ParallelSortClusteredCursor(){ delete [] _cursors; - delete [] _nexts; + _cursors = 0; } bool ParallelSortClusteredCursor::more(){ - for ( int i=0; i<_numServers; i++ ){ - if ( ! _nexts[i].isEmpty() ) - return true; - if ( _cursors[i].get() && _cursors[i]->more() ) + if ( _needToSkip > 0 ){ + int n = _needToSkip; + _needToSkip = 0; + + while ( n > 0 && more() ){ + BSONObj x = next(); + n--; + } + + _needToSkip = n; + } + + for ( int i=0; i<_numServers; i++ ){ + if ( _cursors[i].more() ) return true; } return false; } BSONObj ParallelSortClusteredCursor::next(){ - advance(); - BSONObj best = BSONObj(); int bestFrom = -1; for ( int i=0; i<_numServers; i++){ - if ( _nexts[i].isEmpty() ) + if ( ! _cursors[i].more() ) continue; + + BSONObj me = _cursors[i].peek(); if ( best.isEmpty() ){ - best = _nexts[i]; + best = me; bestFrom = i; continue; } - int comp = best.woSortOrder( _nexts[i] , _sortKey ); + int comp = best.woSortOrder( me , _sortKey , true ); if ( comp < 0 ) continue; - best = _nexts[i]; + best = me; bestFrom = i; } - + uassert( 10019 , "no more elements" , ! best.isEmpty() ); - _nexts[bestFrom] = BSONObj(); + _cursors[bestFrom].next(); return best; } - void ParallelSortClusteredCursor::advance(){ - for ( int i=0; i<_numServers; i++ ){ - - if ( ! _nexts[i].isEmpty() ){ - // already have a good object there - continue; - } - - if ( ! _cursors[i]->more() ){ - // cursor is dead, oh well - continue; - } - - _nexts[i] = _cursors[i]->next(); + void ParallelSortClusteredCursor::_explain( map< string,list >& out ){ + for ( set::iterator i=_servers.begin(); i!=_servers.end(); ++i ){ + const ServerAndQuery& sq = *i; + list & l = out[sq._server]; + l.push_back( explain( sq._server , sq._extra ) ); } - + } // ----------------- @@ -239,36 +452,31 @@ } bool Future::CommandResult::join(){ - while ( ! _done ) - sleepmicros( 50 ); + _thr->join(); + assert( _done ); return _ok; } - void Future::commandThread(){ - assert( _grab ); - shared_ptr res = *_grab; - _grab = 0; - - ScopedDbConnection conn( res->_server ); - res->_ok = conn->runCommand( res->_db , res->_cmd , res->_res ); + void Future::commandThread( shared_ptr res ){ + setThreadName( "future" ); + + try { + ScopedDbConnection conn( res->_server ); + res->_ok = conn->runCommand( res->_db , res->_cmd , res->_res ); + conn.done(); + } + catch ( std::exception& e ){ + error() << "Future::commandThread exception: " << e.what() << endl; + res->_ok = false; + } res->_done = true; } shared_ptr Future::spawnCommand( const string& server , const string& db , const BSONObj& cmd ){ - shared_ptr res; - res.reset( new Future::CommandResult( server , db , cmd ) ); - - _grab = &res; - - boost::thread thr( Future::commandThread ); - - while ( _grab ) - sleepmicros(2); - + shared_ptr res( new Future::CommandResult( server , db , cmd ) ); + res->_thr.reset( new boost::thread( boost::bind( Future::commandThread , res ) ) ); return res; } - - shared_ptr * Future::_grab; } diff -Nru mongodb-1.4.4/client/parallel.h mongodb-1.6.3/client/parallel.h --- mongodb-1.4.4/client/parallel.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/parallel.h 2010-09-24 10:02:42.000000000 -0700 @@ -16,16 +16,53 @@ */ /** - tools for wokring in parallel/sharded/clustered environment + tools for working in parallel/sharded/clustered environment */ -#include "../stdafx.h" +#include "../pch.h" #include "dbclient.h" +#include "redef_macros.h" #include "../db/dbmessage.h" +#include "../db/matcher.h" namespace mongo { /** + * holder for a server address and a query to run + */ + class ServerAndQuery { + public: + ServerAndQuery( const string& server , BSONObj extra = BSONObj() , BSONObj orderObject = BSONObj() ) : + _server( server ) , _extra( extra.getOwned() ) , _orderObject( orderObject.getOwned() ){ + } + + bool operator<( const ServerAndQuery& other ) const{ + if ( ! _orderObject.isEmpty() ) + return _orderObject.woCompare( other._orderObject ) < 0; + + if ( _server < other._server ) + return true; + if ( other._server > _server ) + return false; + return _extra.woCompare( other._extra ) < 0; + } + + string toString() const { + StringBuilder ss; + ss << "server:" << _server << " _extra:" << _extra.toString() << " _orderObject:" << _orderObject.toString(); + return ss.str(); + } + + operator string() const { + return toString(); + } + + string _server; + BSONObj _extra; + BSONObj _orderObject; + }; + + /** * this is a cursor that works over a set of servers * can be used in serial/paralellel as controlled by sub classes */ @@ -34,7 +71,10 @@ ClusteredCursor( QueryMessage& q ); ClusteredCursor( const string& ns , const BSONObj& q , int options=0 , const BSONObj& fields=BSONObj() ); virtual ~ClusteredCursor(); - + + /** call before using */ + void init(); + virtual bool more() = 0; virtual BSONObj next() = 0; @@ -42,53 +82,105 @@ virtual string type() const = 0; + virtual BSONObj explain(); + protected: - auto_ptr query( const string& server , int num = 0 , BSONObj extraFilter = BSONObj() ); + + virtual void _init() = 0; + auto_ptr query( const string& server , int num = 0 , BSONObj extraFilter = BSONObj() , int skipLeft = 0 ); + BSONObj explain( const string& server , BSONObj extraFilter = BSONObj() ); + static BSONObj _concatFilter( const BSONObj& filter , const BSONObj& extraFilter ); + virtual void _explain( map< string,list >& out ) = 0; + string _ns; BSONObj _query; int _options; BSONObj _fields; + int _batchSize; + + bool _didInit; bool _done; }; - /** - * holder for a server address and a query to run - */ - class ServerAndQuery { + class FilteringClientCursor { public: - ServerAndQuery( const string& server , BSONObj extra = BSONObj() , BSONObj orderObject = BSONObj() ) : - _server( server ) , _extra( extra.getOwned() ) , _orderObject( orderObject.getOwned() ){ + FilteringClientCursor( const BSONObj filter = BSONObj() ); + FilteringClientCursor( auto_ptr cursor , const BSONObj filter = BSONObj() ); + ~FilteringClientCursor(); + + void reset( auto_ptr cursor ); + + bool more(); + BSONObj next(); + + BSONObj peek(); + private: + void _advance(); + + Matcher _matcher; + auto_ptr _cursor; + + BSONObj _next; + bool _done; + }; + + + class Servers { + public: + Servers(){ + } + + void add( const ServerAndQuery& s ){ + add( s._server , s._extra ); } + + void add( const string& server , const BSONObj& filter ){ + vector& mine = _filters[server]; + mine.push_back( filter.getOwned() ); + } + + // TOOO: pick a less horrible name + class View { + View( const Servers* s ){ + for ( map >::const_iterator i=s->_filters.begin(); i!=s->_filters.end(); ++i ){ + _servers.push_back( i->first ); + _filters.push_back( i->second ); + } + } + public: + int size() const { + return _servers.size(); + } - bool operator<( const ServerAndQuery& other ) const{ - if ( ! _orderObject.isEmpty() ) - return _orderObject.woCompare( other._orderObject ) < 0; + string getServer( int n ) const { + return _servers[n]; + } + + vector getFilter( int n ) const { + return _filters[ n ]; + } - if ( _server < other._server ) - return true; - if ( other._server > _server ) - return false; - return _extra.woCompare( other._extra ) < 0; - } + private: + vector _servers; + vector< vector > _filters; - string toString() const { - StringBuilder ss; - ss << "server:" << _server << " _extra:" << _extra << " _orderObject:" << _orderObject; - return ss.str(); - } + friend class Servers; + }; - operator string() const { - return toString(); + View view() const { + return View( this ); } + - string _server; - BSONObj _extra; - BSONObj _orderObject; + private: + map > _filters; + + friend class View; }; @@ -102,11 +194,18 @@ virtual bool more(); virtual BSONObj next(); virtual string type() const { return "SerialServer"; } - private: + + protected: + virtual void _explain( map< string,list >& out ); + + void _init(){} + vector _servers; unsigned _serverIndex; - auto_ptr _current; + FilteringClientCursor _current; + + int _needToSkip; }; @@ -123,17 +222,18 @@ virtual bool more(); virtual BSONObj next(); virtual string type() const { return "ParallelSort"; } - private: + protected: + void _finishCons(); void _init(); - - void advance(); + + virtual void _explain( map< string,list >& out ); int _numServers; set _servers; BSONObj _sortKey; - - auto_ptr * _cursors; - BSONObj * _nexts; + + FilteringClientCursor * _cursors; + int _needToSkip; }; /** @@ -174,7 +274,7 @@ string _db; BSONObj _cmd; - boost::thread _thr; + scoped_ptr _thr; BSONObj _res; bool _done; @@ -183,13 +283,12 @@ friend class Future; }; - static void commandThread(); + static void commandThread( shared_ptr res ); static shared_ptr spawnCommand( const string& server , const string& db , const BSONObj& cmd ); - - private: - static shared_ptr * _grab; }; } + +#include "undef_macros.h" diff -Nru mongodb-1.4.4/client/redef_macros.h mongodb-1.6.3/client/redef_macros.h --- mongodb-1.4.4/client/redef_macros.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/redef_macros.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,55 @@ +/** @file redef_macros.h - redefine macros from undef_macros.h */ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// If you define a new global un-prefixed macro, please add it here and in undef_macros + +// #pragma once // this file is intended to be processed multiple times + +#if defined(MONGO_MACROS_CLEANED) + +// util/allocator.h +#define malloc MONGO_malloc +#define realloc MONGO_realloc + +// util/assert_util.h +#define assert MONGO_assert +#define dassert MONGO_dassert +#define wassert MONGO_wassert +#define massert MONGO_massert +#define uassert MONGO_uassert +#define BOOST_CHECK_EXCEPTION MONGO_BOOST_CHECK_EXCEPTION +#define DESTRUCTOR_GUARD MONGO_DESTRUCTOR_GUARD + +// util/goodies.h +#define PRINT MONGO_PRINT +#define PRINTFL MONGO_PRINTFL +#define asctime MONGO_asctime +#define gmtime MONGO_gmtime +#define localtime MONGO_localtime +#define ctime MONGO_ctime + +// util/debug_util.h +#define DEV MONGO_DEV +#define DEBUGGING MONGO_DEBUGGING +#define SOMETIMES MONGO_SOMETIMES +#define OCCASIONALLY MONGO_OCCASIONALLY +#define RARELY MONGO_RARELY +#define ONCE MONGO_ONCE + +#undef MONGO_MACROS_CLEANED +#endif + diff -Nru mongodb-1.4.4/client/syncclusterconnection.cpp mongodb-1.6.3/client/syncclusterconnection.cpp --- mongodb-1.4.4/client/syncclusterconnection.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/syncclusterconnection.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,15 +16,29 @@ */ -#include "stdafx.h" +#include "pch.h" #include "syncclusterconnection.h" #include "../db/dbmessage.h" // error codes 8000-8009 namespace mongo { + + SyncClusterConnection::SyncClusterConnection( const list & L) : _mutex("SynClusterConnection") { + { + stringstream s; + int n=0; + for( list::const_iterator i = L.begin(); i != L.end(); i++ ) { + if( ++n > 1 ) s << ','; + s << i->toString(); + } + _address = s.str(); + } + for( list::const_iterator i = L.begin(); i != L.end(); i++ ) + _connect( i->toString() ); + } - SyncClusterConnection::SyncClusterConnection( string commaSeperated ){ + SyncClusterConnection::SyncClusterConnection( string commaSeperated ) : _mutex("SyncClusterConnection") { _address = commaSeperated; string::size_type idx; while ( ( idx = commaSeperated.find( ',' ) ) != string::npos ){ @@ -36,7 +50,7 @@ uassert( 8004 , "SyncClusterConnection needs 3 servers" , _conns.size() == 3 ); } - SyncClusterConnection::SyncClusterConnection( string a , string b , string c ){ + SyncClusterConnection::SyncClusterConnection( string a , string b , string c ) : _mutex("SyncClusterConnection") { _address = a + "," + b + "," + c; // connect to all even if not working _connect( a ); @@ -44,7 +58,7 @@ _connect( c ); } - SyncClusterConnection::SyncClusterConnection( SyncClusterConnection& prev ){ + SyncClusterConnection::SyncClusterConnection( SyncClusterConnection& prev ) : _mutex("SyncClusterConnection") { assert(0); } @@ -55,6 +69,7 @@ } bool SyncClusterConnection::prepare( string& errmsg ){ + _lastErrors.clear(); return fsync( errmsg ); } @@ -79,7 +94,7 @@ } void SyncClusterConnection::_checkLast(){ - vector all; + _lastErrors.clear(); vector errors; for ( size_t i=0; i<_conns.size(); i++ ){ @@ -95,17 +110,17 @@ catch ( ... ){ err += "unknown failure"; } - all.push_back( res ); + _lastErrors.push_back( res.getOwned() ); errors.push_back( err ); } - - assert( all.size() == errors.size() && all.size() == _conns.size() ); + + assert( _lastErrors.size() == errors.size() && _lastErrors.size() == _conns.size() ); stringstream err; bool ok = true; for ( size_t i = 0; i<_conns.size(); i++ ){ - BSONObj res = all[i]; + BSONObj res = _lastErrors[i]; if ( res["ok"].trueValue() && res["fsyncFiles"].numberInt() > 0 ) continue; ok = false; @@ -117,35 +132,71 @@ throw UserException( 8001 , (string)"SyncClusterConnection write op failed: " + err.str() ); } + BSONObj SyncClusterConnection::getLastErrorDetailed(){ + if ( _lastErrors.size() ) + return _lastErrors[0]; + return DBClientBase::getLastErrorDetailed(); + } + void SyncClusterConnection::_connect( string host ){ log() << "SyncClusterConnection connecting to [" << host << "]" << endl; DBClientConnection * c = new DBClientConnection( true ); string errmsg; if ( ! c->connect( host , errmsg ) ) log() << "SyncClusterConnection connect fail to: " << host << " errmsg: " << errmsg << endl; + _connAddresses.push_back( host ); _conns.push_back( c ); } - auto_ptr SyncClusterConnection::query(const string &ns, Query query, int nToReturn, int nToSkip, - const BSONObj *fieldsToReturn, int queryOptions, int batchSize ){ + bool SyncClusterConnection::callRead( Message& toSend , Message& response ){ + // TODO: need to save state of which one to go back to somehow... + return _conns[0]->callRead( toSend , response ); + } + BSONObj SyncClusterConnection::findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn, int queryOptions) { + if ( ns.find( ".$cmd" ) != string::npos ){ string cmdName = query.obj.firstElement().fieldName(); - int lockType = 0; - - map::iterator i = _lockTypes.find( cmdName ); - if ( i == _lockTypes.end() ){ - BSONObj info; - uassert( 13053 , "help failed" , _commandOnActive( "admin" , BSON( cmdName << "1" << "help" << 1 ) , info ) ); - lockType = info["lockType"].numberInt(); - _lockTypes[cmdName] = lockType; - } - else { - lockType = i->second; + int lockType = _lockType( cmdName ); + + if ( lockType > 0 ){ // write $cmd + string errmsg; + if ( ! prepare( errmsg ) ) + throw UserException( 13104 , (string)"SyncClusterConnection::findOne prepare failed: " + errmsg ); + + vector all; + for ( size_t i=0; i<_conns.size(); i++ ){ + all.push_back( _conns[i]->findOne( ns , query , 0 , queryOptions ).getOwned() ); + } + + _checkLast(); + + for ( size_t i=0; itoString(); + throw UserException( 13105 , ss.str() ); + } + + return all[0]; } - - uassert( 13054 , (string)"write $cmd not supported in SyncClusterConnection: " + cmdName , lockType <= 0 ); + } + + return DBClientBase::findOne( ns , query , fieldsToReturn , queryOptions ); + } + + + auto_ptr SyncClusterConnection::query(const string &ns, Query query, int nToReturn, int nToSkip, + const BSONObj *fieldsToReturn, int queryOptions, int batchSize ){ + _lastErrors.clear(); + if ( ns.find( ".$cmd" ) != string::npos ){ + string cmdName = query.obj.firstElement().fieldName(); + int lockType = _lockType( cmdName ); + uassert( 13054 , (string)"write $cmd not supported in SyncClusterConnection::query for:" + cmdName , lockType <= 0 ); } return _queryOnActive( ns , query , nToReturn , nToSkip , fieldsToReturn , queryOptions , batchSize ); @@ -185,6 +236,10 @@ } void SyncClusterConnection::insert( const string &ns, BSONObj obj ){ + + uassert( 13119 , (string)"SyncClusterConnection::insert obj has to have an _id: " + obj.jsonString() , + ns.find( ".system.indexes" ) != string::npos || obj["_id"].type() ); + string errmsg; if ( ! prepare( errmsg ) ) throw UserException( 8003 , (string)"SyncClusterConnection::insert prepare failed: " + errmsg ); @@ -201,19 +256,52 @@ } void SyncClusterConnection::remove( const string &ns , Query query, bool justOne ){ - assert(0); + string errmsg; + if ( ! prepare( errmsg ) ) + throw UserException( 8020 , (string)"SyncClusterConnection::remove prepare failed: " + errmsg ); + + for ( size_t i=0; i<_conns.size(); i++ ){ + _conns[i]->remove( ns , query , justOne ); + } + + _checkLast(); } void SyncClusterConnection::update( const string &ns , Query query , BSONObj obj , bool upsert , bool multi ){ - string errmsg; - if ( ! prepare( errmsg ) ) - throw UserException( 8005 , (string)"SyncClusterConnection::udpate prepare failed: " + errmsg ); + + if ( upsert ){ + uassert( 13120 , "SyncClusterConnection::update upsert query needs _id" , query.obj["_id"].type() ); + } + + if ( _writeConcern ){ + string errmsg; + if ( ! prepare( errmsg ) ) + throw UserException( 8005 , (string)"SyncClusterConnection::udpate prepare failed: " + errmsg ); + } for ( size_t i=0; i<_conns.size(); i++ ){ - _conns[i]->update( ns , query , obj , upsert , multi ); + try { + _conns[i]->update( ns , query , obj , upsert , multi ); + } + catch ( std::exception& e ){ + if ( _writeConcern ) + throw e; + } } - _checkLast(); + if ( _writeConcern ){ + _checkLast(); + assert( _lastErrors.size() > 1 ); + + int a = _lastErrors[0]["n"].numberInt(); + for ( unsigned i=1; i<_lastErrors.size(); i++ ){ + int b = _lastErrors[i]["n"].numberInt(); + if ( a == b ) + continue; + + throw UpdateNotTheSame( 8017 , "update not consistent" , _connAddresses , _lastErrors ); + } + } } string SyncClusterConnection::_toString() const { @@ -244,12 +332,53 @@ } void SyncClusterConnection::say( Message &toSend ){ - assert(0); + string errmsg; + if ( ! prepare( errmsg ) ) + throw UserException( 13397 , (string)"SyncClusterConnection::say prepare failed: " + errmsg ); + + for ( size_t i=0; i<_conns.size(); i++ ){ + _conns[i]->say( toSend ); + } + + _checkLast(); } void SyncClusterConnection::sayPiggyBack( Message &toSend ){ assert(0); } + int SyncClusterConnection::_lockType( const string& name ){ + { + scoped_lock lk(_mutex); + map::iterator i = _lockTypes.find( name ); + if ( i != _lockTypes.end() ) + return i->second; + } + + BSONObj info; + uassert( 13053 , "help failed" , _commandOnActive( "admin" , BSON( name << "1" << "help" << 1 ) , info ) ); + + int lockType = info["lockType"].numberInt(); + + scoped_lock lk(_mutex); + _lockTypes[name] = lockType; + return lockType; + } + + void SyncClusterConnection::killCursor( long long cursorID ){ + // should never need to do this + assert(0); + } + + bool SyncClusterConnection::isMember( const DBConnector * conn ) const { + if ( conn == this ) + return true; + + for ( unsigned i=0; i<_conns.size(); i++ ) + if ( _conns[i]->isMember( conn ) ) + return true; + + return false; + } } diff -Nru mongodb-1.4.4/client/syncclusterconnection.h mongodb-1.6.3/client/syncclusterconnection.h --- mongodb-1.4.4/client/syncclusterconnection.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/client/syncclusterconnection.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,5 @@ -// syncclusterconnection.h +// @file syncclusterconnection.h + /* * Copyright 2010 10gen Inc. * @@ -16,25 +17,36 @@ */ -#include "../stdafx.h" +#include "../pch.h" #include "dbclient.h" +#include "redef_macros.h" namespace mongo { /** - * this is a connection to a cluster of servers that operate as one - * for super high durability + * This is a connection to a cluster of servers that operate as one + * for super high durability. + * + * Write operations are two-phase. First, all nodes are asked to fsync. If successful + * everywhere, the write is sent everywhere and then followed by an fsync. There is no + * rollback if a problem occurs during the second phase. Naturally, with all these fsyncs, + * these operations will be quite slow -- use sparingly. + * + * Read operations are sent to a single random node. + * + * The class checks if a command is read or write style, and sends to a single + * node if a read lock command and to all in two phases with a write style command. */ class SyncClusterConnection : public DBClientBase { public: /** - * @param commaSeperated should be 3 hosts comma seperated + * @param commaSeparated should be 3 hosts comma separated */ - SyncClusterConnection( string commaSeperated ); + SyncClusterConnection( const list & ); + SyncClusterConnection( string commaSeparated ); SyncClusterConnection( string a , string b , string c ); ~SyncClusterConnection(); - /** * @return true if all servers are up and ready for writes */ @@ -47,6 +59,8 @@ // --- from DBClientInterface + virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn, int queryOptions); + virtual auto_ptr query(const string &ns, Query query, int nToReturn, int nToSkip, const BSONObj *fieldsToReturn, int queryOptions, int batchSize ); @@ -60,41 +74,67 @@ virtual void update( const string &ns , Query query , BSONObj obj , bool upsert , bool multi ); - virtual string toString(){ - return _toString(); - } - virtual bool call( Message &toSend, Message &response, bool assertOk ); virtual void say( Message &toSend ); virtual void sayPiggyBack( Message &toSend ); + + virtual void killCursor( long long cursorID ); virtual string getServerAddress() const { return _address; } + virtual bool isFailed() const { return false; } + virtual string toString() { return _toString(); } - virtual bool isFailed() const { - return false; - } + virtual BSONObj getLastErrorDetailed(); + + virtual bool callRead( Message& toSend , Message& response ); + + virtual ConnectionString::ConnectionType type() const { return ConnectionString::SYNC; } + + virtual bool isMember( const DBConnector * conn ) const; private: - SyncClusterConnection( SyncClusterConnection& prev ); - - string _toString() const; - + string _toString() const; bool _commandOnActive(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0); - auto_ptr _queryOnActive(const string &ns, Query query, int nToReturn, int nToSkip, const BSONObj *fieldsToReturn, int queryOptions, int batchSize ); - - bool _isReadOnly( const string& name ); - + int _lockType( const string& name ); void _checkLast(); - void _connect( string host ); string _address; + vector _connAddresses; vector _conns; map _lockTypes; + mongo::mutex _mutex; + + vector _lastErrors; }; + class UpdateNotTheSame : public UserException { + public: + UpdateNotTheSame( int code , const string& msg , const vector& addrs , const vector& lastErrors ) + : UserException( code , msg ) , _addrs( addrs ) , _lastErrors( lastErrors ){ + assert( _addrs.size() == _lastErrors.size() ); + } + + virtual ~UpdateNotTheSame() throw() { + } + + unsigned size() const { + return _addrs.size(); + } + pair operator[](unsigned i) const { + return make_pair( _addrs[i] , _lastErrors[i] ); + } + + private: + + vector _addrs; + vector _lastErrors; + }; + }; + +#include "undef_macros.h" diff -Nru mongodb-1.4.4/client/undef_macros.h mongodb-1.6.3/client/undef_macros.h --- mongodb-1.4.4/client/undef_macros.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/client/undef_macros.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,58 @@ +/** @file undef_macros.h - remove mongo-specific macros that might cause issues */ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// If you define a new global un-prefixed macro, please add it here and in redef_macros + +// #pragma once // this file is intended to be processed multiple times + + +/** MONGO_EXPOSE_MACROS - when defined, indicates that you are compiling a mongo program rather + than just using the C++ driver. +*/ +#if !defined(MONGO_EXPOSE_MACROS) && !defined(MONGO_MACROS_CLEANED) + +// util/allocator.h +#undef malloc +#undef realloc + +// util/assert_util.h +#undef assert +#undef dassert +#undef wassert +#undef massert +#undef uassert +#undef BOOST_CHECK_EXCEPTION +#undef DESTRUCTOR_GUARD + +// util/goodies.h +#undef PRINT +#undef PRINTFL +#undef asctime +#undef gmtime +#undef localtime +#undef ctime + +// util/debug_util.h +#undef DEV +#undef DEBUGGING +#undef SOMETIMES +#undef OCCASIONALLY +#undef RARELY +#undef ONCE + +#define MONGO_MACROS_CLEANED +#endif diff -Nru mongodb-1.4.4/db/btree.cpp mongodb-1.6.3/db/btree.cpp --- mongodb-1.4.4/db/btree.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/btree.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "db.h" #include "btree.h" #include "pdfile.h" @@ -55,8 +55,8 @@ } int BucketBasics::Size() const { - assert( _Size == BucketSize ); - return _Size; + assert( _wasSize == BucketSize ); + return BucketSize; } inline void BucketBasics::setNotPacked() { flags &= ~Packed; @@ -84,7 +84,7 @@ bt_dmp=0; } - int BucketBasics::fullValidate(const DiskLoc& thisLoc, const BSONObj &order) { + int BucketBasics::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount) { { bool f = false; assert( f = true ); @@ -107,18 +107,24 @@ for ( int i = 0; i < n; i++ ) { _KeyNode& kn = k(i); - if ( kn.isUsed() ) kc++; + if ( kn.isUsed() ) { + kc++; + } else { + if ( unusedCount ) { + ++( *unusedCount ); + } + } if ( !kn.prevChildBucket.isNull() ) { DiskLoc left = kn.prevChildBucket; BtreeBucket *b = left.btree(); wassert( b->parent == thisLoc ); - kc += b->fullValidate(kn.prevChildBucket, order); + kc += b->fullValidate(kn.prevChildBucket, order, unusedCount); } } if ( !nextChild.isNull() ) { BtreeBucket *b = nextChild.btree(); wassert( b->parent == thisLoc ); - kc += b->fullValidate(nextChild, order); + kc += b->fullValidate(nextChild, order, unusedCount); } return kc; @@ -126,7 +132,7 @@ int nDumped = 0; - void BucketBasics::assertValid(const BSONObj &order, bool force) { + void BucketBasics::assertValid(const Ordering &order, bool force) { if ( !debug && !force ) return; wassert( n >= 0 && n < Size() ); @@ -183,13 +189,14 @@ } inline int BucketBasics::totalDataSize() const { - return Size() - (data-(char*)this); + return (int) (Size() - (data-(char*)this)); } void BucketBasics::init() { parent.Null(); nextChild.Null(); - _Size = BucketSize; + _wasSize = BucketSize; + _reserved1 = 0; flags = Packed; n = 0; emptySize = totalDataSize(); @@ -247,7 +254,7 @@ } /* add a key. must be > all existing. be careful to set next ptr right. */ - bool BucketBasics::_pushBack(const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order, DiskLoc prevChild) { + bool BucketBasics::_pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild) { int bytesNeeded = key.objsize() + sizeof(_KeyNode); if ( bytesNeeded > emptySize ) return false; @@ -268,12 +275,12 @@ }*/ /* insert a key in a bucket with no complexity -- no splits required */ - bool BucketBasics::basicInsert(const DiskLoc& thisLoc, int keypos, const DiskLoc& recordLoc, const BSONObj& key, const BSONObj &order) { + bool BucketBasics::basicInsert(const DiskLoc& thisLoc, int &keypos, const DiskLoc& recordLoc, const BSONObj& key, const Ordering &order) { modified(thisLoc); assert( keypos >= 0 && keypos <= n ); int bytesNeeded = key.objsize() + sizeof(_KeyNode); if ( bytesNeeded > emptySize ) { - pack( order ); + pack( order, keypos ); if ( bytesNeeded > emptySize ) return false; } @@ -293,7 +300,7 @@ /* when we delete things we just leave empty space until the node is full and then we repack it. */ - void BucketBasics::pack( const BSONObj &order ) { + void BucketBasics::pack( const Ordering &order, int &refPos ) { if ( flags & Packed ) return; @@ -301,14 +308,29 @@ char temp[BucketSize]; int ofs = tdz; topSize = 0; + int i = 0; for ( int j = 0; j < n; j++ ) { - short ofsold = k(j).keyDataOfs(); - int sz = keyNode(j).key.objsize(); + if( j > 0 && ( j != refPos ) && k( j ).isUnused() && k( j ).prevChildBucket.isNull() ) { + continue; // key is unused and has no children - drop it + } + if( i != j ) { + if ( refPos == j ) { + refPos = i; // i < j so j will never be refPos again + } + k( i ) = k( j ); + } + short ofsold = k(i).keyDataOfs(); + int sz = keyNode(i).key.objsize(); ofs -= sz; topSize += sz; memcpy(temp+ofs, dataAt(ofsold), sz); - k(j).setKeyDataOfsSavingUse( ofs ); + k(i).setKeyDataOfsSavingUse( ofs ); + ++i; } + if ( refPos == n ) { + refPos = i; + } + n = i; int dataUsed = tdz - ofs; memcpy(data + ofs, temp + ofs, dataUsed); emptySize = tdz - dataUsed - n * sizeof(_KeyNode); @@ -318,10 +340,10 @@ assertValid( order ); } - inline void BucketBasics::truncateTo(int N, const BSONObj &order) { + inline void BucketBasics::truncateTo(int N, const Ordering &order, int &refPos) { n = N; setNotPacked(); - pack( order ); + pack( order, refPos ); } /* - BtreeBucket --------------------------------------------------- */ @@ -343,8 +365,37 @@ break; } } + + int BtreeBucket::customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, const vector< const BSONElement * > &rEnd, const Ordering &o ) { + BSONObjIterator ll( l ); + BSONObjIterator rr( rBegin ); + vector< const BSONElement * >::const_iterator rr2 = rEnd.begin(); + unsigned mask = 1; + for( int i = 0; i < rBeginLen; ++i, mask <<= 1 ) { + BSONElement lll = ll.next(); + BSONElement rrr = rr.next(); + ++rr2; + + int x = lll.woCompare( rrr, false ); + if ( o.descending( mask ) ) + x = -x; + if ( x != 0 ) + return x; + } + for( ; ll.more(); mask <<= 1 ) { + BSONElement lll = ll.next(); + BSONElement rrr = **rr2; + ++rr2; + int x = lll.woCompare( rrr, false ); + if ( o.descending( mask ) ) + x = -x; + if ( x != 0 ) + return x; + } + return 0; + } - bool BtreeBucket::exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, BSONObj order) { + bool BtreeBucket::exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, const Ordering& order) { int pos; bool found; DiskLoc b = locate(idx, thisLoc, key, order, pos, found, minDiskLoc); @@ -367,7 +418,7 @@ */ bool BtreeBucket::wouldCreateDup( const IndexDetails& idx, DiskLoc thisLoc, - const BSONObj& key, BSONObj order, + const BSONObj& key, const Ordering& order, DiskLoc self) { int pos; @@ -411,7 +462,7 @@ note result might be an Unused location! */ char foo; - bool BtreeBucket::find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const BSONObj &order, int& pos, bool assertIfDup) { + bool BtreeBucket::find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const Ordering &order, int& pos, bool assertIfDup) { #if defined(_EXPERIMENT1) { char *z = (char *) this; @@ -509,11 +560,11 @@ assert(false); } found: - deallocBucket( thisLoc ); + deallocBucket( thisLoc, id ); } - void BtreeBucket::deallocBucket(const DiskLoc &thisLoc) { -#if 1 + void BtreeBucket::deallocBucket(const DiskLoc &thisLoc, IndexDetails &id) { +#if 0 /* as a temporary defensive measure, we zap the whole bucket, AND don't truly delete it (meaning it is ineligible for reuse). */ @@ -523,11 +574,8 @@ //defensive: n = -1; parent.Null(); - massert( 10284 , "todo: use RecStoreInterface instead", false); - // TODO: this was broken anyway as deleteRecord does unindexRecord() call which assumes the data is a BSONObj, - // and it isn't. - assert(false); -// theDataFileMgr.deleteRecord(id.indexNamespace().c_str(), thisLoc.rec(), thisLoc); + string ns = id.indexNamespace(); + btreeStore->deleteRecord(ns.c_str(), thisLoc); #endif } @@ -560,13 +608,13 @@ /* remove a key from the index */ bool BtreeBucket::unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc ) { if ( key.objsize() > KeyMax ) { - OCCASIONALLY problem() << "unindex: key too large to index, skipping " << id.indexNamespace() << /* ' ' << key.toString() << */ '\n'; + OCCASIONALLY problem() << "unindex: key too large to index, skipping " << id.indexNamespace() << /* ' ' << key.toString() << */ endl; return false; } int pos; bool found; - DiskLoc loc = locate(id, thisLoc, key, id.keyPattern(), pos, found, recordLoc, 1); + DiskLoc loc = locate(id, thisLoc, key, Ordering::make(id.keyPattern()), pos, found, recordLoc, 1); if ( found ) { loc.btree()->delKeyAtPos(loc, id, pos); return true; @@ -598,9 +646,11 @@ /* insert a key in this bucket, splitting if necessary. keypos - where to insert the key i3n range 0..n. 0=make leftmost, n=make rightmost. + NOTE this function may free some data, and as a result the value passed for keypos may + be invalid after calling insertHere() */ void BtreeBucket::insertHere(DiskLoc thisLoc, int keypos, - DiskLoc recordLoc, const BSONObj& key, const BSONObj& order, + DiskLoc recordLoc, const BSONObj& key, const Ordering& order, DiskLoc lchild, DiskLoc rchild, IndexDetails& idx) { modified(thisLoc); @@ -667,7 +717,7 @@ int split = n / 2; if ( keypos == n ) { // see SERVER-983 - split = 0.9 * n; + split = (int) (0.9 * n); if ( split > n - 2 ) split = n - 2; } @@ -718,7 +768,8 @@ } } - truncateTo(split, order); // note this may trash splitkey.key. thus we had to promote it before finishing up here. + int newpos = keypos; + truncateTo(split, order, newpos); // note this may trash splitkey.key. thus we had to promote it before finishing up here. // add our new key, there is room now { @@ -726,7 +777,7 @@ if ( keypos <= split ) { if ( split_debug ) out() << " keypos=0); @@ -806,7 +857,7 @@ return DiskLoc(); } - DiskLoc BtreeBucket::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const BSONObj& key, const BSONObj &order, int& pos, bool& found, DiskLoc recordLoc, int direction) { + DiskLoc BtreeBucket::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, int& pos, bool& found, DiskLoc recordLoc, int direction) { int p; found = find(idx, key, recordLoc, order, p, /*assertIfDup*/ false); if ( found ) { @@ -829,10 +880,133 @@ return pos == n ? DiskLoc() /*theend*/ : thisLoc; } + bool BtreeBucket::customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) { + while( 1 ) { + if ( l + 1 == h ) { + keyOfs = ( direction > 0 ) ? h : l; + DiskLoc next = thisLoc.btree()->k( h ).prevChildBucket; + if ( !next.isNull() ) { + bestParent = make_pair( thisLoc, keyOfs ); + thisLoc = next; + return true; + } else { + return false; + } + } + int m = l + ( h - l ) / 2; + int cmp = customBSONCmp( thisLoc.btree()->keyNode( m ).key, keyBegin, keyBeginLen, keyEnd, order ); + if ( cmp < 0 ) { + l = m; + } else if ( cmp > 0 ) { + h = m; + } else { + if ( direction < 0 ) { + l = m; + } else { + h = m; + } + } + } + } + + // find smallest/biggest value greater-equal/less-equal than specified + // starting thisLoc + keyOfs will be strictly less than/strictly greater than keyBegin/keyBeginLen/keyEnd + // All the direction checks below allowed me to refactor the code, but possibly separate forward and reverse implementations would be more efficient + void BtreeBucket::advanceTo(const IndexDetails &id, DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction ) { + int l,h; + bool dontGoUp; + if ( direction > 0 ) { + l = keyOfs; + h = n - 1; + dontGoUp = ( customBSONCmp( keyNode( h ).key, keyBegin, keyBeginLen, keyEnd, order ) >= 0 ); + } else { + l = 0; + h = keyOfs; + dontGoUp = ( customBSONCmp( keyNode( l ).key, keyBegin, keyBeginLen, keyEnd, order ) <= 0 ); + } + pair< DiskLoc, int > bestParent; + if ( dontGoUp ) { + // this comparison result assures h > l + if ( !customFind( l, h, keyBegin, keyBeginLen, keyEnd, order, direction, thisLoc, keyOfs, bestParent ) ) { + return; + } + } else { + // go up parents until rightmost/leftmost node is >=/<= target or at top + while( !thisLoc.btree()->parent.isNull() ) { + thisLoc = thisLoc.btree()->parent; + if ( direction > 0 ) { + if ( customBSONCmp( thisLoc.btree()->keyNode( thisLoc.btree()->n - 1 ).key, keyBegin, keyBeginLen, keyEnd, order ) >= 0 ) { + break; + } + } else { + if ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, keyEnd, order ) <= 0 ) { + break; + } + } + } + } + // go down until find smallest/biggest >=/<= target + while( 1 ) { + l = 0; + h = thisLoc.btree()->n - 1; + // leftmost/rightmost key may possibly be >=/<= search key + bool firstCheck; + if ( direction > 0 ) { + firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, keyEnd, order ) >= 0 ); + } else { + firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, keyEnd, order ) <= 0 ); + } + if ( firstCheck ) { + DiskLoc next; + if ( direction > 0 ) { + next = thisLoc.btree()->k( 0 ).prevChildBucket; + keyOfs = 0; + } else { + next = thisLoc.btree()->nextChild; + keyOfs = h; + } + if ( !next.isNull() ) { + bestParent = make_pair( thisLoc, keyOfs ); + thisLoc = next; + continue; + } else { + return; + } + } + bool secondCheck; + if ( direction > 0 ) { + secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, keyEnd, order ) < 0 ); + } else { + secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, keyEnd, order ) > 0 ); + } + if ( secondCheck ) { + DiskLoc next; + if ( direction > 0 ) { + next = thisLoc.btree()->nextChild; + } else { + next = thisLoc.btree()->k( 0 ).prevChildBucket; + } + if ( next.isNull() ) { + // if bestParent is null, we've hit the end and thisLoc gets set to DiskLoc() + thisLoc = bestParent.first; + keyOfs = bestParent.second; + return; + } else { + thisLoc = next; + continue; + } + } + if ( !customFind( l, h, keyBegin, keyBeginLen, keyEnd, order, direction, thisLoc, keyOfs, bestParent ) ) { + return; + } + } + } + + /* @thisLoc disk location of *this */ int BtreeBucket::_insert(DiskLoc thisLoc, DiskLoc recordLoc, - const BSONObj& key, const BSONObj &order, bool dupsAllowed, + const BSONObj& key, const Ordering &order, bool dupsAllowed, DiskLoc lChild, DiskLoc rChild, IndexDetails& idx) { if ( key.objsize() > KeyMax ) { problem() << "ERROR: key too large len:" << key.objsize() << " max:" << KeyMax << ' ' << key.objsize() << ' ' << idx.indexNamespace() << endl; @@ -898,12 +1072,12 @@ /* todo: meaning of return code unclear clean up */ int BtreeBucket::bt_insert(DiskLoc thisLoc, DiskLoc recordLoc, - const BSONObj& key, const BSONObj &order, bool dupsAllowed, + const BSONObj& key, const Ordering &order, bool dupsAllowed, IndexDetails& idx, bool toplevel) { if ( toplevel ) { if ( key.objsize() > KeyMax ) { - problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << '\n'; + problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << endl; return 3; } } @@ -921,7 +1095,9 @@ DiskLoc BtreeBucket::findSingle( const IndexDetails& indexdetails , const DiskLoc& thisLoc, const BSONObj& key ){ int pos; bool found; - DiskLoc bucket = locate( indexdetails , indexdetails.head , key , BSONObj() , pos , found , minDiskLoc ); + /* TODO: is it really ok here that the order is a default? */ + Ordering o = Ordering::make(BSONObj()); + DiskLoc bucket = locate( indexdetails , indexdetails.head , key , o , pos , found , minDiskLoc ); if ( bucket.isNull() ) return bucket; @@ -958,7 +1134,8 @@ DiskLoc rl; BSONObj key = fromjson("{x:9}"); - BSONObj order = fromjson("{}"); + BSONObj orderObj = fromjson("{}"); + Ordering order = Ordering::make(orderObj); b->bt_insert(id.head, A, key, order, true, id); A.GETOFS() += 2; @@ -974,7 +1151,7 @@ b->k(2).setUnused(); b->k(3).setUnused(); - b->dumpTree(id.head, order); + b->dumpTree(id.head, orderObj); /* b->bt_insert(id.head, B, key, order, false, id); b->k(1).setUnused(); @@ -989,17 +1166,20 @@ // this should assert. does it? (it might "accidentally" though, not asserting proves a problem, asserting proves nothing) b->bt_insert(id.head, C, key, order, false, id); - b->dumpTree(id.head, order); +// b->dumpTree(id.head, order); } /* --- BtreeBuilder --- */ BtreeBuilder::BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx) : - dupsAllowed(_dupsAllowed), idx(_idx), n(0) + dupsAllowed(_dupsAllowed), + idx(_idx), + n(0), + order( idx.keyPattern() ), + ordering( Ordering::make(idx.keyPattern()) ) { first = cur = BtreeBucket::addBucket(idx); b = cur.btreemod(); - order = idx.keyPattern(); committed = false; } @@ -1023,15 +1203,15 @@ keyLast = key; } - if ( ! b->_pushBack(loc, key, order, DiskLoc()) ){ + if ( ! b->_pushBack(loc, key, ordering, DiskLoc()) ){ // no room if ( key.objsize() > KeyMax ) { - problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << '\n'; + problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << endl; } else { // bucket was full newBucket(); - b->pushBack(loc, key, order, DiskLoc()); + b->pushBack(loc, key, ordering, DiskLoc()); } } n++; @@ -1060,13 +1240,13 @@ bool keepX = ( x->n != 0 ); DiskLoc keepLoc = keepX ? xloc : x->nextChild; - if ( ! up->_pushBack(r, k, order, keepLoc) ){ + if ( ! up->_pushBack(r, k, ordering, keepLoc) ){ // current bucket full DiskLoc n = BtreeBucket::addBucket(idx); up->tempNext() = n; upLoc = n; up = upLoc.btreemod(); - up->pushBack(r, k, order, keepLoc); + up->pushBack(r, k, ordering, keepLoc); } DiskLoc nextLoc = x->tempNext(); /* get next in chain at current level */ @@ -1075,7 +1255,7 @@ } else { if ( !x->nextChild.isNull() ) x->nextChild.btreemod()->parent = upLoc; - x->deallocBucket( xloc ); + x->deallocBucket( xloc, idx ); } xloc = nextLoc; } diff -Nru mongodb-1.4.4/db/btreecursor.cpp mongodb-1.6.3/db/btreecursor.cpp --- mongodb-1.4.4/db/btreecursor.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/btreecursor.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "btree.h" #include "pdfile.h" #include "jsobj.h" @@ -35,29 +35,39 @@ multikey( d->isMultikey( idxNo ) ), indexDetails( _id ), order( _id.keyPattern() ), + _ordering( Ordering::make( order ) ), direction( _direction ), - boundIndex_(), - _spec( _id.getSpec() ) + _spec( _id.getSpec() ), + _independentFieldRanges( false ) { audit(); init(); + DEV assert( dups.size() == 0 ); } - BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const vector< pair< BSONObj, BSONObj > > &_bounds, int _direction ) + BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction ) : d(_d), idxNo(_idxNo), endKeyInclusive_( true ), multikey( d->isMultikey( idxNo ) ), indexDetails( _id ), order( _id.keyPattern() ), + _ordering( Ordering::make( order ) ), direction( _direction ), - bounds_( _bounds ), - boundIndex_(), - _spec( _id.getSpec() ) + bounds_( ( assert( _bounds.get() ), _bounds ) ), + _boundsIterator( new FieldRangeVector::Iterator( *bounds_ ) ), + _spec( _id.getSpec() ), + _independentFieldRanges( true ) { - assert( !bounds_.empty() ); + massert( 13384, "BtreeCursor FieldRangeVector constructor doesn't accept special indexes", !_spec.getType() ); audit(); - initInterval(); + startKey = bounds_->startKey(); + bool found; + _boundsIterator->advance( startKey ); // handles initialization + bucket = indexDetails.head.btree()-> + locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction); + skipAndCheck(); + DEV assert( dups.size() == 0 ); } void BtreeCursor::audit() { @@ -82,21 +92,41 @@ } bool found; bucket = indexDetails.head.btree()-> - locate(indexDetails, indexDetails.head, startKey, order, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction); - skipUnusedKeys(); - checkEnd(); + locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction); + skipUnusedKeys( false ); + checkEnd(); } - void BtreeCursor::initInterval() { - do { - startKey = bounds_[ boundIndex_ ].first; - endKey = bounds_[ boundIndex_ ].second; - init(); - } while ( !ok() && ++boundIndex_ < bounds_.size() ); + void BtreeCursor::skipAndCheck() { + skipUnusedKeys( true ); + while( 1 ) { + if ( !skipOutOfRangeKeysAndCheckEnd() ) { + break; + } + while( skipOutOfRangeKeysAndCheckEnd() ); + if ( !skipUnusedKeys( true ) ) { + break; + } + } } - + + bool BtreeCursor::skipOutOfRangeKeysAndCheckEnd() { + if ( !ok() ) { + return false; + } + int ret = _boundsIterator->advance( currKeyNode().key ); + if ( ret == -2 ) { + bucket = DiskLoc(); + return false; + } else if ( ret == -1 ) { + return false; + } + advanceTo( currKeyNode().key, ret, _boundsIterator->cmp() ); + return true; + } + /* skip unused keys. */ - void BtreeCursor::skipUnusedKeys() { + bool BtreeCursor::skipUnusedKeys( bool mayJump ) { int u = 0; while ( 1 ) { if ( !ok() ) @@ -107,12 +137,16 @@ break; bucket = b->advance(bucket, keyOfs, direction, "skipUnusedKeys"); u++; + if ( mayJump && ( u % 10 == 0 ) ) { + skipOutOfRangeKeysAndCheckEnd(); + } } if ( u > 10 ) OCCASIONALLY log() << "btree unused skipped:" << u << '\n'; + return u; } -// Return a value in the set {-1, 0, 1} to represent the sign of parameter i. + // Return a value in the set {-1, 0, 1} to represent the sign of parameter i. int sgn( int i ) { if ( i == 0 ) return 0; @@ -130,17 +164,26 @@ bucket = DiskLoc(); } } - + + void BtreeCursor::advanceTo( const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd) { + bucket.btree()->advanceTo( indexDetails, bucket, keyOfs, keyBegin, keyBeginLen, keyEnd, _ordering, direction ); + } + bool BtreeCursor::advance() { killCurrentOp.checkForInterrupt(); if ( bucket.isNull() ) return false; + bucket = bucket.btree()->advance(bucket, keyOfs, direction, "BtreeCursor::advance"); - skipUnusedKeys(); - checkEnd(); - if( !ok() && ++boundIndex_ < bounds_.size() ) - initInterval(); - return !bucket.isNull(); + + if ( !_independentFieldRanges ) { + skipUnusedKeys( false ); + checkEnd(); + return ok(); + } + + skipAndCheck(); + return ok(); } void BtreeCursor::noteLocation() { @@ -173,15 +216,25 @@ // Note keyAt() returns an empty BSONObj if keyOfs is now out of range, // which is possible as keys may have been deleted. - if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) && + int x = 0; + while( 1 ) { + if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) && b->k(keyOfs).recordLoc == locAtKeyOfs ) { - if ( !b->k(keyOfs).isUsed() ) { - /* we were deleted but still exist as an unused - marker key. advance. - */ - skipUnusedKeys(); + if ( !b->k(keyOfs).isUsed() ) { + /* we were deleted but still exist as an unused + marker key. advance. + */ + skipUnusedKeys( false ); + } + return; } - return; + + /* we check one key earlier too, in case a key was just deleted. this is + important so that multi updates are reasonably fast. + */ + if( keyOfs == 0 || x++ ) + break; + keyOfs--; } } @@ -192,10 +245,10 @@ bool found; /* TODO: Switch to keep indexdetails and do idx.head! */ - bucket = indexDetails.head.btree()->locate(indexDetails, indexDetails.head, keyAtKeyOfs, order, keyOfs, found, locAtKeyOfs, direction); + bucket = indexDetails.head.btree()->locate(indexDetails, indexDetails.head, keyAtKeyOfs, _ordering, keyOfs, found, locAtKeyOfs, direction); RARELY log() << " key seems to have moved in the index, refinding. found:" << found << endl; if ( ! bucket.isNull() ) - skipUnusedKeys(); + skipUnusedKeys( false ); } diff -Nru mongodb-1.4.4/db/btree.h mongodb-1.6.3/db/btree.h --- mongodb-1.4.4/db/btree.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/btree.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" #include "diskloc.h" #include "pdfile.h" @@ -26,7 +26,6 @@ namespace mongo { #pragma pack(1) - struct _KeyNode { DiskLoc prevChildBucket; // the lchild DiskLoc recordLoc; // location of the record associated with the key @@ -60,7 +59,6 @@ return !isUnused(); } }; - #pragma pack() class BucketBasics; @@ -75,7 +73,6 @@ }; #pragma pack(1) - /* this class is all about the storage management */ class BucketBasics { friend class BtreeBuilder; @@ -83,8 +80,11 @@ public: void dumpTree(DiskLoc thisLoc, const BSONObj &order); bool isHead() { return parent.isNull(); } - void assertValid(const BSONObj &order, bool force = false); - int fullValidate(const DiskLoc& thisLoc, const BSONObj &order); /* traverses everything */ + void assertValid(const Ordering &order, bool force = false); + void assertValid(const BSONObj &orderObj, bool force = false) { + return assertValid(Ordering::make(orderObj),force); + } + int fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount = 0); /* traverses everything */ KeyNode keyNode(int i) const { if ( i >= n ){ @@ -106,13 +106,13 @@ /* returns false if node is full and must be split keypos is where to insert -- inserted after that key #. so keypos=0 is the leftmost one. */ - bool basicInsert(const DiskLoc& thisLoc, int keypos, const DiskLoc& recordLoc, const BSONObj& key, const BSONObj &order); + bool basicInsert(const DiskLoc& thisLoc, int &keypos, const DiskLoc& recordLoc, const BSONObj& key, const Ordering &order); /** * @return true if works, false if not enough space */ - bool _pushBack(const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order, DiskLoc prevChild); - void pushBack(const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order, DiskLoc prevChild){ + bool _pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild); + void pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild){ bool ok = _pushBack( recordLoc , key , order , prevChild ); assert(ok); } @@ -130,12 +130,12 @@ } int totalDataSize() const; - void pack( const BSONObj &order ); + void pack( const Ordering &order, int &refPos); void setNotPacked(); void setPacked(); int _alloc(int bytes); void _unalloc(int bytes); - void truncateTo(int N, const BSONObj &order); + void truncateTo(int N, const Ordering &order, int &refPos); void markUnused(int keypos); /* BtreeBuilder uses the parent var as a temp place to maintain a linked list chain. @@ -152,7 +152,7 @@ ss << " n: " << n << endl; ss << " parent: " << parent.toString() << endl; ss << " nextChild: " << parent.toString() << endl; - ss << " Size: " << _Size << " flags:" << flags << endl; + ss << " flags:" << flags << endl; ss << " emptySize: " << emptySize << " topSize: " << topSize << endl; return ss.str(); } @@ -164,7 +164,12 @@ protected: void _shape(int level, stringstream&); DiskLoc nextChild; // child bucket off and to the right of the highest key. - int _Size; // total size of this btree node in bytes. constant. + + private: + unsigned short _wasSize; // can be reused, value is 8192 in current pdfile version Apr2010 + unsigned short _reserved1; // zero + + protected: int Size() const; int flags; int emptySize; // size of the empty region @@ -179,7 +184,9 @@ } char data[4]; }; +#pragma pack() +#pragma pack(1) class BtreeBucket : public BucketBasics { friend class BtreeCursor; public: @@ -191,20 +198,20 @@ BSONObj order = ((IndexDetails&)idx).keyPattern(); likewise below in bt_insert() etc. */ - bool exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, BSONObj order); + bool exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, const Ordering& order); bool wouldCreateDup( const IndexDetails& idx, DiskLoc thisLoc, - const BSONObj& key, BSONObj order, + const BSONObj& key, const Ordering& order, DiskLoc self); static DiskLoc addBucket(IndexDetails&); /* start a new index off, empty */ - void deallocBucket(const DiskLoc &thisLoc); // clear bucket memory, placeholder for deallocation + void deallocBucket(const DiskLoc &thisLoc, IndexDetails &id); static void renameIndexNamespace(const char *oldNs, const char *newNs); int bt_insert(DiskLoc thisLoc, DiskLoc recordLoc, - const BSONObj& key, const BSONObj &order, bool dupsAllowed, + const BSONObj& key, const Ordering &order, bool dupsAllowed, IndexDetails& idx, bool toplevel = true); bool unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc); @@ -215,7 +222,7 @@ found - returns true if exact match found. note you can get back a position result even if found is false. */ - DiskLoc locate(const IndexDetails& , const DiskLoc& thisLoc, const BSONObj& key, const BSONObj &order, + DiskLoc locate(const IndexDetails& , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, int& pos, bool& found, DiskLoc recordLoc, int direction=1); /** @@ -227,6 +234,9 @@ /* advance one key position in the index: */ DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller); + + void advanceTo(const IndexDetails &id, DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction ); + DiskLoc getHead(const DiskLoc& thisLoc); /* get tree shape */ @@ -243,24 +253,28 @@ } static BtreeBucket* allocTemp(); /* caller must release with free() */ void insertHere(DiskLoc thisLoc, int keypos, - DiskLoc recordLoc, const BSONObj& key, const BSONObj &order, + DiskLoc recordLoc, const BSONObj& key, const Ordering &order, DiskLoc lchild, DiskLoc rchild, IndexDetails&); int _insert(DiskLoc thisLoc, DiskLoc recordLoc, - const BSONObj& key, const BSONObj &order, bool dupsAllowed, + const BSONObj& key, const Ordering &order, bool dupsAllowed, DiskLoc lChild, DiskLoc rChild, IndexDetails&); - bool find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const BSONObj &order, int& pos, bool assertIfDup); + bool find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const Ordering &order, int& pos, bool assertIfDup); + bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ); static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey); + static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, const vector< const BSONElement * > &rEnd, const Ordering &o ); public: // simply builds and returns a dup key error message string static string dupKeyError( const IndexDetails& idx , const BSONObj& key ); }; +#pragma pack() class BtreeCursor : public Cursor { public: BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction ); - BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const BoundList &_bounds, int _direction ); - + BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction ); + ~BtreeCursor(){ + } virtual bool ok() { return !bucket.isNull(); } @@ -272,6 +286,7 @@ virtual void noteLocation(); // updates keyAtKeyOfs... virtual void checkLocation(); virtual bool supportGetMore() { return true; } + virtual bool supportYields() { return true; } /* used for multikey index traversal to avoid sending back dups. see Matcher::matches(). if a multikey index traversal: @@ -279,7 +294,6 @@ otherwise, marks loc as sent. @return true if the loc has not been seen */ - set dups; virtual bool getsetdup(DiskLoc loc) { if( multikey ) { pair::iterator, bool> p = dups.insert(loc); @@ -326,7 +340,7 @@ virtual string toString() { string s = string("BtreeCursor ") + indexDetails.indexName(); if ( direction < 0 ) s += " reverse"; - if ( bounds_.size() > 1 ) s += " multi"; + if ( bounds_.get() && bounds_->size() > 1 ) s += " multi"; return s; } @@ -335,26 +349,31 @@ } virtual BSONObj prettyIndexBounds() const { - BSONArrayBuilder ba; - if ( bounds_.size() == 0 ) { - ba << BSON_ARRAY( prettyKey( startKey ) << prettyKey( endKey ) ); + if ( !_independentFieldRanges ) { + return BSON( "start" << prettyKey( startKey ) << "end" << prettyKey( endKey ) ); } else { - for( BoundList::const_iterator i = bounds_.begin(); i != bounds_.end(); ++i ) { - ba << BSON_ARRAY( prettyKey( i->first ) << prettyKey( i->second ) ); - } + return bounds_->obj(); } - return ba.arr(); } void forgetEndKey() { endKey = BSONObj(); } + virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); } + + virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { + _matcher = matcher; + } + + // for debugging only + DiskLoc getBucket() const { return bucket; } + private: /* Our btrees may (rarely) have "unused" keys when items are deleted. Skip past them. */ - void skipUnusedKeys(); - - /* Check if the current key is beyond endKey. */ + bool skipUnusedKeys( bool mayJump ); + bool skipOutOfRangeKeysAndCheckEnd(); + void skipAndCheck(); void checkEnd(); // selective audits on construction @@ -363,36 +382,40 @@ // set initial bucket void init(); - // init start / end keys with a new range - void initInterval(); - + void advanceTo( const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd); + friend class BtreeBucket; + set dups; NamespaceDetails *d; int idxNo; + BSONObj startKey; BSONObj endKey; bool endKeyInclusive_; + bool multikey; // note this must be updated every getmore batch in case someone added a multikey... const IndexDetails& indexDetails; BSONObj order; + Ordering _ordering; DiskLoc bucket; int keyOfs; int direction; // 1=fwd,-1=reverse BSONObj keyAtKeyOfs; // so we can tell if things moved around on us between the query and the getMore call DiskLoc locAtKeyOfs; - BoundList bounds_; - unsigned boundIndex_; + shared_ptr< FieldRangeVector > bounds_; + auto_ptr< FieldRangeVector::Iterator > _boundsIterator; const IndexSpec& _spec; + shared_ptr< CoveredIndexMatcher > _matcher; + bool _independentFieldRanges; }; -#pragma pack() inline bool IndexDetails::hasKey(const BSONObj& key) { - return head.btree()->exists(*this, head, key, keyPattern()); + return head.btree()->exists(*this, head, key, Ordering::make(keyPattern())); } inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) { - return head.btree()->wouldCreateDup(*this, head, key, keyPattern(), self); + return head.btree()->wouldCreateDup(*this, head, key, Ordering::make(keyPattern()), self); } /* build btree from the bottom up */ @@ -403,6 +426,7 @@ unsigned long long n; BSONObj keyLast; BSONObj order; + Ordering ordering; bool committed; DiskLoc cur, first; diff -Nru mongodb-1.4.4/db/cap.cpp mongodb-1.6.3/db/cap.cpp --- mongodb-1.4.4/db/cap.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/cap.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,393 @@ +// @file cap.cpp capped collection related + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "pdfile.h" +#include "db.h" +#include "../util/mmap.h" +#include "../util/hashtab.h" +#include "../scripting/engine.h" +#include "btree.h" +#include +#include +#include "query.h" +#include "queryutil.h" +#include "json.h" + +/* + capped collection layout + + d's below won't exist if things align perfectly: + + extent1 -> extent2 -> extent3 + ------------------- ----------------------- --------------------- + d r r r r r r r r d d r r r r d r r r r r d d r r r r r r r r r d + ^ ^ + oldest newest + + ^cappedFirstDeletedInCurExtent() + ^cappedLastDelRecLastExtent() + ^cappedListOfAllDeletedRecords() +*/ + + +namespace mongo { + + /* combine adjacent deleted records *for the current extent* of the capped collection + + this is O(n^2) but we call it for capped tables where typically n==1 or 2! + (or 3...there will be a little unused sliver at the end of the extent.) + */ + void NamespaceDetails::compact() { + assert(capped); + + list drecs; + + // Pull out capExtent's DRs from deletedList + DiskLoc i = cappedFirstDeletedInCurExtent(); + for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted ) + drecs.push_back( i ); + cappedFirstDeletedInCurExtent() = i; + + // This is the O(n^2) part. + drecs.sort(); + + list::iterator j = drecs.begin(); + assert( j != drecs.end() ); + DiskLoc a = *j; + while ( 1 ) { + j++; + if ( j == drecs.end() ) { + DEBUGGING out() << "TEMP: compact adddelrec\n"; + addDeletedRec(a.drec(), a); + break; + } + DiskLoc b = *j; + while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) { + // a & b are adjacent. merge. + a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders; + j++; + if ( j == drecs.end() ) { + DEBUGGING out() << "temp: compact adddelrec2\n"; + addDeletedRec(a.drec(), a); + return; + } + b = *j; + } + DEBUGGING out() << "temp: compact adddelrec3\n"; + addDeletedRec(a.drec(), a); + a = b; + } + } + + DiskLoc &NamespaceDetails::cappedFirstDeletedInCurExtent() { + if ( cappedLastDelRecLastExtent().isNull() ) + return cappedListOfAllDeletedRecords(); + else + return cappedLastDelRecLastExtent().drec()->nextDeleted; + } + + void NamespaceDetails::cappedCheckMigrate() { + // migrate old NamespaceDetails format + assert( capped ); + if ( capExtent.a() == 0 && capExtent.getOfs() == 0 ) { + capFirstNewRecord = DiskLoc(); + capFirstNewRecord.setInvalid(); + // put all the DeletedRecords in cappedListOfAllDeletedRecords() + for ( int i = 1; i < Buckets; ++i ) { + DiskLoc first = deletedList[ i ]; + if ( first.isNull() ) + continue; + DiskLoc last = first; + for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted ); + last.drec()->nextDeleted = cappedListOfAllDeletedRecords(); + cappedListOfAllDeletedRecords() = first; + deletedList[ i ] = DiskLoc(); + } + // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above + + // Last, in case we're killed before getting here + capExtent = firstExtent; + } + } + + bool NamespaceDetails::inCapExtent( const DiskLoc &dl ) const { + assert( !dl.isNull() ); + // We could have a rec or drec, doesn't matter. + return dl.drec()->myExtent( dl ) == capExtent.ext(); + } + + bool NamespaceDetails::nextIsInCapExtent( const DiskLoc &dl ) const { + assert( !dl.isNull() ); + DiskLoc next = dl.drec()->nextDeleted; + if ( next.isNull() ) + return false; + return inCapExtent( next ); + } + + void NamespaceDetails::advanceCapExtent( const char *ns ) { + // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent + // (or DiskLoc() if new capExtent == firstExtent) + if ( capExtent == lastExtent ) + cappedLastDelRecLastExtent() = DiskLoc(); + else { + DiskLoc i = cappedFirstDeletedInCurExtent(); + for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted ); + cappedLastDelRecLastExtent() = i; + } + + capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext; + + /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */ + //dassert( theCapExtent()->ns == ns ); + + theCapExtent()->assertOk(); + capFirstNewRecord = DiskLoc(); + } + + DiskLoc NamespaceDetails::__capAlloc( int len ) { + DiskLoc prev = cappedLastDelRecLastExtent(); + DiskLoc i = cappedFirstDeletedInCurExtent(); + DiskLoc ret; + for (; !i.isNull() && inCapExtent( i ); prev = i, i = i.drec()->nextDeleted ) { + // We need to keep at least one DR per extent in cappedListOfAllDeletedRecords(), + // so make sure there's space to create a DR at the end. + if ( i.drec()->lengthWithHeaders >= len + 24 ) { + ret = i; + break; + } + } + + /* unlink ourself from the deleted list */ + if ( !ret.isNull() ) { + if ( prev.isNull() ) + cappedListOfAllDeletedRecords() = ret.drec()->nextDeleted; + else + prev.drec()->nextDeleted = ret.drec()->nextDeleted; + ret.drec()->nextDeleted.setInvalid(); // defensive. + assert( ret.drec()->extentOfs < ret.getOfs() ); + } + + return ret; + } + + DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) { + // signal done allocating new extents. + if ( !cappedLastDelRecLastExtent().isValid() ) + cappedLastDelRecLastExtent() = DiskLoc(); + + assert( len < 400000000 ); + int passes = 0; + int maxPasses = ( len / 30 ) + 2; // 30 is about the smallest entry that could go in the oplog + if ( maxPasses < 5000 ){ + // this is for bacwards safety since 5000 was the old value + maxPasses = 5000; + } + DiskLoc loc; + + // delete records until we have room and the max # objects limit achieved. + + /* this fails on a rename -- that is ok but must keep commented out */ + //assert( theCapExtent()->ns == ns ); + + theCapExtent()->assertOk(); + DiskLoc firstEmptyExtent; + while ( 1 ) { + if ( nrecords < max ) { + loc = __capAlloc( len ); + if ( !loc.isNull() ) + break; + } + + // If on first iteration through extents, don't delete anything. + if ( !capFirstNewRecord.isValid() ) { + advanceCapExtent( ns ); + if ( capExtent != firstExtent ) + capFirstNewRecord.setInvalid(); + // else signal done with first iteration through extents. + continue; + } + + if ( !capFirstNewRecord.isNull() && + theCapExtent()->firstRecord == capFirstNewRecord ) { + // We've deleted all records that were allocated on the previous + // iteration through this extent. + advanceCapExtent( ns ); + continue; + } + + if ( theCapExtent()->firstRecord.isNull() ) { + if ( firstEmptyExtent.isNull() ) + firstEmptyExtent = capExtent; + advanceCapExtent( ns ); + if ( firstEmptyExtent == capExtent ) { + maybeComplain( ns, len ); + return DiskLoc(); + } + continue; + } + + DiskLoc fr = theCapExtent()->firstRecord; + theDataFileMgr.deleteRecord(ns, fr.rec(), fr, true); // ZZZZZZZZZZZZ + compact(); + if( ++passes > maxPasses ) { + log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n'; + log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl; + massert( 10345 , "passes >= maxPasses in capped collection alloc", false ); + } + } + + // Remember first record allocated on this iteration through capExtent. + if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() ) + capFirstNewRecord = loc; + + return loc; + } + + void NamespaceDetails::dumpExtents() { + cout << "dumpExtents:" << endl; + for ( DiskLoc i = firstExtent; !i.isNull(); i = i.ext()->xnext ) { + Extent *e = i.ext(); + stringstream ss; + e->dump(ss); + cout << ss.str() << endl; + } + } + + void NamespaceDetails::cappedDumpDelInfo() { + cout << "dl[0]: " << deletedList[0].toString() << endl; + for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) { + cout << " drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders << + " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl; + } + cout << "dl[1]: " << deletedList[1].toString() << endl; + } + + /* everything from end on, eliminate from the capped collection. + @param inclusive if true, deletes end (i.e. closed or open range) + */ + void NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive) { + DEV assert( this == nsdetails(ns) ); + assert( cappedLastDelRecLastExtent().isValid() ); + + bool foundLast = false; + while( 1 ) { + if ( foundLast ) { + break; + } + DiskLoc curr = theCapExtent()->lastRecord; + assert( !curr.isNull() ); + if ( curr == end ) { + if ( inclusive ) { + foundLast = true; + } else { + break; + } + } + + uassert( 13415, "emptying the collection is not allowed", nrecords > 1 ); + + if ( !capLooped() ) { + theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); + compact(); + if ( theCapExtent()->lastRecord.isNull() ) { + assert( !theCapExtent()->xprev.isNull() ); + capExtent = theCapExtent()->xprev; + theCapExtent()->assertOk(); + if ( capExtent == firstExtent ) { + cappedLastDelRecLastExtent() = DiskLoc(); + } else { + // slow - there's no prev ptr for deleted rec + DiskLoc i = cappedListOfAllDeletedRecords(); + for( ; + !i.drec()->nextDeleted.isNull() && + !inCapExtent( i.drec()->nextDeleted ); + i = i.drec()->nextDeleted ); + assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent + cappedLastDelRecLastExtent() = i; + } + } + continue; + } + + theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); + compact(); + if ( curr == capFirstNewRecord ) { // invalid, but can compare locations + capExtent = ( capExtent == firstExtent ) ? lastExtent : theCapExtent()->xprev; + theCapExtent()->assertOk(); + assert( !theCapExtent()->firstRecord.isNull() ); + capFirstNewRecord = theCapExtent()->firstRecord; + if ( capExtent == firstExtent ) { + cappedLastDelRecLastExtent() = DiskLoc(); + } else { + // slow - there's no prev ptr for deleted rec + DiskLoc i = cappedListOfAllDeletedRecords(); + for( ; + !i.drec()->nextDeleted.isNull() && + !inCapExtent( i.drec()->nextDeleted ); + i = i.drec()->nextDeleted ); + assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent + cappedLastDelRecLastExtent() = i; + } + } + } + } + + void NamespaceDetails::emptyCappedCollection( const char *ns ) { + DEV assert( this == nsdetails(ns) ); + massert( 13424, "collection must be capped", capped ); + massert( 13425, "background index build in progress", !backgroundIndexBuildInProgress ); + massert( 13426, "indexes present", nIndexes == 0 ); + + ClientCursor::invalidate( ns ); + NamespaceDetailsTransient::clearForPrefix( ns ); + + cappedLastDelRecLastExtent() = DiskLoc(); + cappedListOfAllDeletedRecords() = DiskLoc(); + + // preserve firstExtent/lastExtent + capExtent = firstExtent; + datasize = nrecords = 0; + // lastExtentSize preserve + // nIndexes preserve 0 + // capped preserve true + // max preserve + paddingFactor = 1.0; + flags = 0; + capFirstNewRecord = DiskLoc(); + capFirstNewRecord.setInvalid(); + cappedLastDelRecLastExtent().setInvalid(); + // dataFileVersion preserve + // indexFileVersion preserve + multiKeyIndexBits = 0; + reservedA = 0; + extraOffset = 0; + // backgroundIndexBuildInProgress preserve 0 + memset(reserved, 0, sizeof(reserved)); + + for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) { + DiskLoc prev = ext.ext()->xprev; + DiskLoc next = ext.ext()->xnext; + DiskLoc empty = ext.ext()->reuse( ns ); + ext.ext()->xprev = prev; + ext.ext()->xnext = next; + addDeletedRec( empty.drec(), empty ); + } + } + +} diff -Nru mongodb-1.4.4/db/client.cpp mongodb-1.6.3/db/client.cpp --- mongodb-1.4.4/db/client.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/client.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -20,24 +20,33 @@ to an open socket (or logical connection if pooling on sockets) from a client. */ -#include "stdafx.h" +#include "pch.h" #include "db.h" #include "client.h" #include "curop.h" #include "json.h" #include "security.h" +#include "commands.h" +#include "instance.h" +#include "../s/d_logic.h" +#include "dbwebserver.h" +#include "../util/mongoutils/html.h" +#include "../util/mongoutils/checksum.h" namespace mongo { - mongo::mutex Client::clientsMutex; + Client* Client::syncThread; + mongo::mutex Client::clientsMutex("clientsMutex"); set Client::clients; // always be in clientsMutex when manipulating this boost::thread_specific_ptr currentClient; - Client::Client(const char *desc) : + Client::Client(const char *desc, MessagingPort *p) : _context(0), _shutdown(false), _desc(desc), - _god(0) + _god(0), + _lastOp(0), + _mp(p) { _curOp = new CurOp( this ); scoped_lock bl(clientsMutex); @@ -45,13 +54,69 @@ } Client::~Client() { - delete _curOp; _god = 0; if ( _context ) - cout << "ERROR: Client::~Client _context should be NULL: " << _desc << endl; - if ( !_shutdown ) - cout << "ERROR: Client::shutdown not called: " << _desc << endl; + error() << "Client::~Client _context should be null but is not; client:" << _desc << endl; + + if ( ! _shutdown ) { + error() << "Client::shutdown not called: " << _desc << endl; + } + + scoped_lock bl(clientsMutex); + if ( ! _shutdown ) + clients.erase(this); + delete _curOp; + } + + void Client::_dropns( const string& ns ){ + Top::global.collectionDropped( ns ); + + dblock l; + Client::Context ctx( ns ); + if ( ! nsdetails( ns.c_str() ) ) + return; + + try { + string err; + BSONObjBuilder b; + dropCollection( ns , err , b ); + } + catch ( ... ){ + warning() << "error dropping temp collection: " << ns << endl; + } + + } + + void Client::_invalidateDB( const string& db ) { + assert( db.find( '.' ) == string::npos ); + + set::iterator min = _tempCollections.lower_bound( db + "." ); + set::iterator max = _tempCollections.lower_bound( db + "|" ); + + _tempCollections.erase( min , max ); + + } + + void Client::invalidateDB(const string& db) { + scoped_lock bl(clientsMutex); + for ( set::iterator i = clients.begin(); i!=clients.end(); i++ ){ + Client* cli = *i; + cli->_invalidateDB(db); + } + } + + void Client::invalidateNS( const string& ns ){ + scoped_lock bl(clientsMutex); + for ( set::iterator i = clients.begin(); i!=clients.end(); i++ ){ + Client* cli = *i; + cli->_tempCollections.erase( ns ); + } + } + + + void Client::addTempCollection( const string& ns ) { + _tempCollections.insert( ns ); } bool Client::shutdown(){ @@ -67,22 +132,8 @@ if ( _tempCollections.size() ){ didAnything = true; - for ( list::iterator i = _tempCollections.begin(); i!=_tempCollections.end(); i++ ){ - string ns = *i; - Top::global.collectionDropped( ns ); - - dblock l; - Client::Context ctx( ns ); - if ( ! nsdetails( ns.c_str() ) ) - continue; - try { - string err; - BSONObjBuilder b; - dropCollection( ns , err , b ); - } - catch ( ... ){ - log() << "error dropping temp collection: " << ns << endl; - } + for ( set::iterator i = _tempCollections.begin(); i!=_tempCollections.end(); i++ ){ + _dropns( *i ); } _tempCollections.clear(); } @@ -152,8 +203,21 @@ _client->_curOp->enter( this ); if ( doauth ) _auth( lockState ); - } + switch ( _client->_curOp->getOp() ){ + case dbGetMore: // getMore's are special and should be handled else where + case dbUpdate: // update & delete check shard version in instance.cpp, so don't check here as well + case dbDelete: + break; + default: { + string errmsg; + if ( ! shardVersionOk( _ns , lockState > 0 , errmsg ) ){ + msgasserted( StaleConfigInContextCode , (string)"[" + _ns + "] shard version not ok in Client::Context: " + errmsg ); + } + } + } + } + void Client::Context::_auth( int lockState ){ if ( _client->_ai.isAuthorizedForLock( _db->name , lockState ) ) return; @@ -162,8 +226,8 @@ _client->_context = _oldContext; // note: _oldContext may be null stringstream ss; - ss << "unauthorized for db [" << _db->name << "] lock type: " << lockState << endl; - massert( 10057 , ss.str() , 0 ); + ss << "unauthorized db:" << _db->name << " lock type:" << lockState << " client:" << _client->clientAddress(); + uasserted( 10057 , ss.str() ); } Client::Context::~Context() { @@ -172,6 +236,12 @@ _client->_context = _oldContext; // note: _oldContext may be null } + string Client::clientAddress() const { + if( _curOp ) + return _curOp->getRemoteString(false); + return ""; + } + string Client::toString() const { stringstream ss; if ( _curOp ) @@ -181,7 +251,7 @@ string sayClientState(){ Client* c = currentClient.get(); - if ( ! c ) + if ( !c ) return "no client"; return c->toString(); } @@ -203,7 +273,39 @@ } } - BSONObj CurOp::infoNoauth() { + CurOp::~CurOp(){ + if ( _wrapped ){ + scoped_lock bl(Client::clientsMutex); + _client->_curOp = _wrapped; + } + + _client = 0; + } + + BSONObj CurOp::query( bool threadSafe ) { + if( querySize() == 1 ) { + return _tooBig; + } + + if ( ! threadSafe ){ + BSONObj o(_queryBuf); + return o; + } + + int size = querySize(); + int before = checksum( _queryBuf , size ); + BSONObj a(_queryBuf); + BSONObj b = a.copy(); + int after = checksum( _queryBuf , size ); + + if ( before == after ) + return b; + + return BSON( "msg" << "query changed while capturing" ); + } + + + BSONObj CurOp::infoNoauth( int attempt ) { BSONObjBuilder b; b.append("opid", _opNum); bool a = _active && _start; @@ -220,12 +322,35 @@ b.append("ns", _ns); - if( haveQuery() ) { - b.append("query", query()); + { + int size = querySize(); + if ( size == 0 ){ + // do nothing + } + else if ( size == 1 ){ + b.append( "query" , _tooBig ); + } + else if ( attempt > 2 ){ + b.append( "query" , BSON( "err" << "can't get a clean object" ) ); + log( LL_WARNING ) << "CurOp changing too much to get reading" << endl; + + } + else { + int before = checksum( _queryBuf , size ); + b.appendObject( "query" , _queryBuf , size ); + int after = checksum( _queryBuf , size ); + + if ( after != before ){ + // this means something changed + // going to retry + return infoNoauth( attempt + 1 ); + } + } } + // b.append("inLock", ?? stringstream clientStr; - clientStr << inet_ntoa( _remote.sin_addr ) << ":" << ntohs( _remote.sin_port ); + clientStr << _remote.toString(); b.append("client", clientStr.str()); if ( _client ) @@ -234,32 +359,139 @@ if ( ! _message.empty() ){ if ( _progressMeter.isActive() ){ StringBuilder buf(128); - buf << _message << " " << _progressMeter.toString(); + buf << _message.toString() << " " << _progressMeter.toString(); b.append( "msg" , buf.str() ); } else { - b.append( "msg" , _message ); + b.append( "msg" , _message.toString() ); } } return b.obj(); } - int Client::recommendedYieldMicros(){ + void Client::gotHandshake( const BSONObj& o ){ + BSONObjIterator i(o); + + { + BSONElement id = i.next(); + assert( id.type() ); + _remoteId = id.wrap( "_id" ); + } + + BSONObjBuilder b; + while ( i.more() ) + b.append( i.next() ); + _handshake = b.obj(); + } + + class HandshakeCmd : public Command { + public: + void help(stringstream& h) const { h << "internal"; } + HandshakeCmd() : Command( "handshake" ){} + virtual LockType locktype() const { return NONE; } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return false; } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + Client& c = cc(); + c.gotHandshake( cmdObj ); + return 1; + } + + } handshakeCmd; + + class ClientListPlugin : public WebStatusPlugin { + public: + ClientListPlugin() : WebStatusPlugin( "clients" , 20 ){} + virtual void init(){} + + virtual void run( stringstream& ss ){ + using namespace mongoutils::html; + + ss << "\n"; + ss << "" + << th( a("", "Connections to the database, both internal and external.", "Client") ) + << th( a("http://www.mongodb.org/display/DOCS/Viewing+and+Terminating+Current+Operation", "", "OpId") ) + << "" + << "" + << "" + << "" + << "" + << th( a("http://www.mongodb.org/display/DOCS/Developer+FAQ#DeveloperFAQ-What%27sa%22namespace%22%3F", "", "Namespace") ) + << "" + << "" + << "" + << "" + + << "\n"; + { + scoped_lock bl(Client::clientsMutex); + for( set::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) { + Client *c = *i; + CurOp& co = *(c->curop()); + ss << ""; + + tablecell( ss , co.opNum() ); + tablecell( ss , co.active() ); + { + int lt = co.getLockType(); + if( lt == -1 ) tablecell(ss, "R"); + else if( lt == 1 ) tablecell(ss, "W"); + else + tablecell( ss , lt); + } + tablecell( ss , co.isWaitingForLock() ); + if ( co.active() ) + tablecell( ss , co.elapsedSeconds() ); + else + tablecell( ss , "" ); + tablecell( ss , co.getOp() ); + tablecell( ss , co.getNS() ); + if ( co.haveQuery() ) + tablecell( ss , co.query( true ) ); + else + tablecell( ss , "" ); + tablecell( ss , co.getRemoteString() ); + + tablecell( ss , co.getMessage() ); + tablecell( ss , co.getProgressMeter().toString() ); + + + ss << "\n"; + } + } + ss << "
ActiveLockTypeWaitingSecsRunningOpQueryclientmsgprogress
" << c->desc() << "
\n"; + + } + + } clientListPlugin; + + int Client::recommendedYieldMicros( int * writers , int * readers ){ int num = 0; + int w = 0; + int r = 0; { scoped_lock bl(clientsMutex); - num = clients.size(); + for ( set::iterator i=clients.begin(); i!=clients.end(); ++i ){ + Client* c = *i; + if ( c->curop()->isWaitingForLock() ){ + num++; + if ( c->curop()->getLockType() > 0 ) + w++; + else + r++; + } + } } - if ( --num <= 0 ) // -- is for myself - return 0; - + if ( writers ) + *writers = w; + if ( readers ) + *readers = r; + if ( num > 50 ) num = 50; - num *= 100; - return num; + return num * 100; } - } diff -Nru mongodb-1.4.4/db/clientcursor.cpp mongodb-1.6.3/db/clientcursor.cpp --- mongodb-1.4.4/db/clientcursor.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/clientcursor.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -22,36 +22,47 @@ Cursor -- and its derived classes -- are our internal cursors. */ -#include "stdafx.h" +#include "pch.h" #include "query.h" #include "introspect.h" #include #include "db.h" #include "commands.h" +#include "repl_block.h" namespace mongo { + typedef multimap CCByLoc; + CCById ClientCursor::clientCursorsById; - CCByLoc ClientCursor::byLoc; boost::recursive_mutex ClientCursor::ccmutex; + long long ClientCursor::numberTimedOut = 0; - unsigned ClientCursor::byLocSize() { + /*static*/ void ClientCursor::assertNoCursors() { recursive_scoped_lock lock(ccmutex); - return byLoc.size(); + if( clientCursorsById.size() ) { + log() << "ERROR clientcursors exist but should not at this point" << endl; + ClientCursor *cc = clientCursorsById.begin()->second; + log() << "first one: " << cc->cursorid << ' ' << cc->ns << endl; + clientCursorsById.clear(); + assert(false); + } } + void ClientCursor::setLastLoc_inlock(DiskLoc L) { if ( L == _lastLoc ) return; + CCByLoc& bl = byLoc(); if ( !_lastLoc.isNull() ) { - CCByLoc::iterator i = kv_find(byLoc, _lastLoc, this); - if ( i != byLoc.end() ) - byLoc.erase(i); + CCByLoc::iterator i = kv_find(bl, _lastLoc, this); + if ( i != bl.end() ) + bl.erase(i); } if ( !L.isNull() ) - byLoc.insert( make_pair(L, this) ); + bl.insert( make_pair(L, this) ); _lastLoc = L; } @@ -75,24 +86,52 @@ { recursive_scoped_lock lock(ccmutex); - for ( CCByLoc::iterator i = byLoc.begin(); i != byLoc.end(); ++i ) { + Database *db = cc().database(); + assert(db); + assert( str::startsWith(nsPrefix, db->name) ); + + for( CCById::iterator i = clientCursorsById.begin(); i != clientCursorsById.end(); ++i ) { ClientCursor *cc = i->second; - if ( strncmp(nsPrefix, cc->ns.c_str(), len) == 0 ) + if( cc->_db != db ) + continue; + if ( strncmp(nsPrefix, cc->ns.c_str(), len) == 0 ) { toDelete.push_back(i->second); + } } + /* + note : we can't iterate byloc because clientcursors may exist with a loc of null in which case + they are not in the map. perhaps they should not exist though in the future? something to + change??? + + CCByLoc& bl = db->ccByLoc; + for ( CCByLoc::iterator i = bl.begin(); i != bl.end(); ++i ) { + ClientCursor *cc = i->second; + if ( strncmp(nsPrefix, cc->ns.c_str(), len) == 0 ) { + assert( cc->_db == db ); + toDelete.push_back(i->second); + } + }*/ + for ( vector::iterator i = toDelete.begin(); i != toDelete.end(); ++i ) delete (*i); } } + bool ClientCursor::shouldTimeout( unsigned millis ){ + _idleAgeMillis += millis; + return _idleAgeMillis > 600000 && _pinValue == 0; + } + /* called every 4 seconds. millis is amount of idle time passed since the last call -- could be zero */ void ClientCursor::idleTimeReport(unsigned millis) { + readlock lk(""); recursive_scoped_lock lock(ccmutex); - for ( CCByLoc::iterator i = byLoc.begin(); i != byLoc.end(); ) { - CCByLoc::iterator j = i; + for ( CCById::iterator i = clientCursorsById.begin(); i != clientCursorsById.end(); ) { + CCById::iterator j = i; i++; if( j->second->shouldTimeout( millis ) ){ + numberTimedOut++; log(1) << "killing old cursor " << j->second->cursorid << ' ' << j->second->ns << " idle:" << j->second->idleTime() << "ms\n"; delete j->second; @@ -105,10 +144,12 @@ */ void ClientCursor::informAboutToDeleteBucket(const DiskLoc& b) { recursive_scoped_lock lock(ccmutex); - RARELY if ( byLoc.size() > 70 ) { - log() << "perf warning: byLoc.size=" << byLoc.size() << " in aboutToDeleteBucket\n"; + Database *db = cc().database(); + CCByLoc& bl = db->ccByLoc; + RARELY if ( bl.size() > 70 ) { + log() << "perf warning: byLoc.size=" << bl.size() << " in aboutToDeleteBucket\n"; } - for ( CCByLoc::iterator i = byLoc.begin(); i != byLoc.end(); i++ ) + for ( CCByLoc::iterator i = bl.begin(); i != bl.end(); i++ ) i->second->c->aboutToDeleteBucket(b); } void aboutToDeleteBucket(const DiskLoc& b) { @@ -119,8 +160,11 @@ void ClientCursor::aboutToDelete(const DiskLoc& dl) { recursive_scoped_lock lock(ccmutex); - CCByLoc::iterator j = byLoc.lower_bound(dl); - CCByLoc::iterator stop = byLoc.upper_bound(dl); + Database *db = cc().database(); + assert(db); + CCByLoc& bl = db->ccByLoc; + CCByLoc::iterator j = bl.lower_bound(dl); + CCByLoc::iterator stop = bl.upper_bound(dl); if ( j == stop ) return; @@ -128,7 +172,7 @@ while ( 1 ) { toAdvance.push_back(j->second); - WIN assert( j->first == dl ); + DEV assert( j->first == dl ); ++j; if ( j == stop ) break; @@ -138,6 +182,7 @@ for ( vector::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ){ ClientCursor* cc = *i; + wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; @@ -155,8 +200,10 @@ } c->advance(); if ( c->eof() ) { - // advanced to end -- delete cursor - delete cc; + // advanced to end + // leave ClientCursor in place so next getMore doesn't fail + // still need to mark new location though + cc->updateLocation(); } else { wassert( c->refLoc() != dl ); @@ -190,29 +237,67 @@ DiskLoc cl = c->refLoc(); if ( lastLoc() == cl ) { //log() << "info: lastloc==curloc " << ns << '\n'; - return; - } - { + } else { recursive_scoped_lock lock(ccmutex); setLastLoc_inlock(cl); - c->noteLocation(); } + // may be necessary for MultiCursor even when cl hasn't changed + c->noteLocation(); } - bool ClientCursor::yield() { - // need to store on the stack in case this gets deleted - CursorId id = cursorid; + int ClientCursor::yieldSuggest() { + int writers = 0; + int readers = 0; + + int micros = Client::recommendedYieldMicros( &writers , &readers ); + + if ( micros > 0 && writers == 0 && dbMutex.getState() <= 0 ){ + // we have a read lock, and only reads are coming on, so why bother unlocking + micros = 0; + } + + return micros; + } + + bool ClientCursor::yieldSometimes(){ + if ( ! _yieldSometimesTracker.ping() ) + return true; - bool doingDeletes = _doingDeletes; - _doingDeletes = false; + int micros = yieldSuggest(); + return ( micros > 0 ) ? yield( micros ) : true; + } + void ClientCursor::staticYield( int micros ) { + { + dbtempreleasecond unlock; + if ( unlock.unlocked() ){ + if ( micros == -1 ) + micros = Client::recommendedYieldMicros(); + if ( micros > 0 ) + sleepmicros( micros ); + } + else { + log( LL_WARNING ) << "ClientCursor::yield can't unlock b/c of recursive lock" << endl; + } + } + } + + bool ClientCursor::prepareToYield( YieldData &data ) { + if ( ! c->supportYields() ) + return false; + // need to store in case 'this' gets deleted + data._id = cursorid; + + data._doingDeletes = _doingDeletes; + _doingDeletes = false; + updateLocation(); - + { /* a quick test that our temprelease is safe. - todo: make a YieldingCursor class - and then make the following code part of a unit test. - */ + todo: make a YieldingCursor class + and then make the following code part of a unit test. + */ const int test = 0; static bool inEmpty = false; if( test && !inEmpty ) { @@ -228,24 +313,42 @@ dropDatabase(ns.c_str()); } } - } - - { - dbtempreleasecond unlock; - sleepmicros( Client::recommendedYieldMicros() ); - } - - if ( ClientCursor::find( id , false ) == 0 ){ - // i was deleted + } + return true; + } + + bool ClientCursor::recoverFromYield( const YieldData &data ) { + ClientCursor *cc = ClientCursor::find( data._id , false ); + if ( cc == 0 ){ + // id was deleted return false; } + + cc->_doingDeletes = data._doingDeletes; + cc->c->checkLocation(); + return true; + } + + bool ClientCursor::yield( int micros ) { + if ( ! c->supportYields() ) + return true; + YieldData data; + prepareToYield( data ); + + staticYield( micros ); - _doingDeletes = doingDeletes; - return true; + return ClientCursor::recoverFromYield( data ); } int ctmLast = 0; // so we don't have to do find() which is a little slow very often. long long ClientCursor::allocCursorId_inlock() { + if( 0 ) { + static long long z; + ++z; + cout << "TEMP alloccursorid " << z << endl; + return z; + } + long long x; int ctm = (int) curTimeMillis(); while ( 1 ) { @@ -255,26 +358,78 @@ break; } ctmLast = ctm; - DEV out() << " alloccursorid " << x << endl; + //DEV tlog() << " alloccursorid " << x << endl; return x; } + void ClientCursor::storeOpForSlave( DiskLoc last ){ + if ( ! ( _queryOptions & QueryOption_OplogReplay )) + return; + + if ( last.isNull() ) + return; + + BSONElement e = last.obj()["ts"]; + if ( e.type() == Date || e.type() == Timestamp ) + _slaveReadTill = e._opTime(); + } + + void ClientCursor::updateSlaveLocation( CurOp& curop ){ + if ( _slaveReadTill.isNull() ) + return; + mongo::updateSlaveLocation( curop , ns.c_str() , _slaveReadTill ); + } + + + void ClientCursor::appendStats( BSONObjBuilder& result ){ + recursive_scoped_lock lock(ccmutex); + result.appendNumber("totalOpen", (int)clientCursorsById.size() ); + result.appendNumber("clientCursors_size", (int) numCursors()); + result.appendNumber("timedOut" , (int)numberTimedOut); + } + // QUESTION: Restrict to the namespace from which this command was issued? // Alternatively, make this command admin-only? class CmdCursorInfo : public Command { public: - CmdCursorInfo() : Command( "cursorInfo" ) {} - virtual bool slaveOk() { return true; } + CmdCursorInfo() : Command( "cursorInfo", true ) {} + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { help << " example: { cursorInfo : 1 }"; } - virtual LockType locktype(){ return NONE; } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - recursive_scoped_lock lock(ClientCursor::ccmutex); - result.append("byLocation_size", unsigned( ClientCursor::byLoc.size() ) ); - result.append("clientCursors_size", unsigned( ClientCursor::clientCursorsById.size() ) ); + virtual LockType locktype() const { return NONE; } + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + ClientCursor::appendStats( result ); return true; } } cmdCursorInfo; + + void ClientCursorMonitor::run(){ + Client::initThread("clientcursormon"); + Client& client = cc(); + + unsigned old = curTimeMillis(); + + while ( ! inShutdown() ){ + unsigned now = curTimeMillis(); + ClientCursor::idleTimeReport( now - old ); + old = now; + sleepsecs(4); + } + + client.shutdown(); + } + + void ClientCursor::find( const string& ns , set& all ){ + recursive_scoped_lock lock(ccmutex); + + for ( CCById::iterator i=clientCursorsById.begin(); i!=clientCursorsById.end(); ++i ){ + if ( i->second->ns == ns ) + all.insert( i->first ); + } + } + + + ClientCursorMonitor clientCursorMonitor; } // namespace mongo diff -Nru mongodb-1.4.4/db/clientcursor.h mongodb-1.6.3/db/clientcursor.h --- mongodb-1.4.4/db/clientcursor.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/clientcursor.h 2010-09-24 10:02:42.000000000 -0700 @@ -24,26 +24,27 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "cursor.h" #include "jsobj.h" #include "../util/message.h" +#include "../util/background.h" #include "diskloc.h" #include "dbhelpers.h" #include "matcher.h" +#include "../client/dbclient.h" namespace mongo { typedef long long CursorId; /* passed to the client so it can send back on getMore */ class Cursor; /* internal server cursor base class */ class ClientCursor; + class ParsedQuery; /* todo: make this map be per connection. this will prevent cursor hijacking security attacks perhaps. */ typedef map CCById; - typedef multimap CCByLoc; - extern BSONObj id_obj; class ClientCursor { @@ -58,14 +59,16 @@ unsigned _pinValue; bool _doingDeletes; + ElapsedTracker _yieldSometimesTracker; static CCById clientCursorsById; - static CCByLoc byLoc; - static boost::recursive_mutex ccmutex; // must use this for all statics above! + static long long numberTimedOut; + static boost::recursive_mutex ccmutex; // must use this for all statics above! + static CursorId allocCursorId_inlock(); - static CursorId allocCursorId_inlock(); - public: + static void assertNoCursors(); + /* use this to assure we don't in the background time out cursor while it is under use. if you are using noTimeout() already, there is no risk anyway. Further, this mechanism guards against two getMore requests on the same cursor executing @@ -97,21 +100,61 @@ release(); } }; + + // This object assures safe and reliable cleanup of the ClientCursor. + // The implementation assumes that there will be no duplicate ids among cursors + // (which is assured if cursors must last longer than 1 second). + class CleanupPointer : boost::noncopyable { + public: + CleanupPointer() : _c( 0 ), _id( -1 ) {} + void reset( ClientCursor *c = 0 ) { + if ( c == _c ) { + return; + } + + if ( _c ) { + // be careful in case cursor was deleted by someone else + ClientCursor::erase( _id ); + } + + if ( c ) { + _c = c; + _id = c->cursorid; + } else { + _c = 0; + _id = -1; + } + } + ~CleanupPointer() { + DESTRUCTOR_GUARD ( reset(); ); + } + operator bool() { return _c; } + ClientCursor * operator-> () { return _c; } + private: + ClientCursor *_c; + CursorId _id; + }; /*const*/ CursorId cursorid; - string ns; - auto_ptr matcher; - auto_ptr c; - int pos; // # objects into the cursor so far - BSONObj query; + const string ns; + const shared_ptr c; + int pos; // # objects into the cursor so far + const BSONObj query; // used for logging diags only; optional in constructor + const int _queryOptions; // see enum QueryOptions dbclient.h + OpTime _slaveReadTill; + Database * const _db; - ClientCursor(auto_ptr& _c, const char *_ns, bool okToTimeout) : + ClientCursor(int queryOptions, shared_ptr& _c, const string& _ns, BSONObj _query = BSONObj()) : _idleAgeMillis(0), _pinValue(0), - _doingDeletes(false), + _doingDeletes(false), _yieldSometimesTracker(128,10), ns(_ns), c(_c), - pos(0) + pos(0), query(_query), + _queryOptions(queryOptions), + _db( cc().database() ) { - if( !okToTimeout ) + assert( _db ); + assert( str::startsWith(_ns, _db->name) ); + if( queryOptions & QueryOption_NoCursorTimeout ) noTimeout(); recursive_scoped_lock lock(ccmutex); cursorid = allocCursorId_inlock(); @@ -123,6 +166,7 @@ return _lastLoc; } + shared_ptr< ParsedQuery > pq; shared_ptr< FieldMatcher > fields; // which fields query wants returned Message originalMessage; // this is effectively an auto ptr for data the matcher points to @@ -132,6 +176,8 @@ static void invalidate(const char *nsPrefix); /** + * @param microsToSleep -1 : ask client + * >=0 : sleep for that amount * do a dbtemprelease * note: caller should check matcher.docMatcher().atomic() first and not yield if atomic - * we don't do herein as this->matcher (above) is only initialized for true queries/getmore. @@ -140,7 +186,80 @@ * if false is returned, then this ClientCursor should be considered deleted - * in fact, the whole database could be gone. */ - bool yield(); + bool yield( int microsToSleep = -1 ); + + /** + * @return same as yield() + */ + bool yieldSometimes(); + + static int yieldSuggest(); + static void staticYield( int micros ); + + struct YieldData { CursorId _id; bool _doingDeletes; }; + bool prepareToYield( YieldData &data ); + static bool recoverFromYield( const YieldData &data ); + + struct YieldLock : boost::noncopyable { + explicit YieldLock( ptr cc ) + : _canYield(cc->c->supportYields()) { + if ( _canYield ){ + cc->prepareToYield( _data ); + _unlock.reset(new dbtempreleasecond()); + } + } + ~YieldLock(){ + if ( _unlock ){ + log( LL_WARNING ) << "ClientCursor::YieldLock not closed properly" << endl; + relock(); + } + } + + bool stillOk(){ + if ( ! _canYield ) + return true; + + relock(); + + return ClientCursor::recoverFromYield( _data ); + } + + void relock(){ + _unlock.reset(); + } + + private: + bool _canYield; + YieldData _data; + + scoped_ptr _unlock; + + }; + + // --- some pass through helpers for Cursor --- + + BSONObj indexKeyPattern() { + return c->indexKeyPattern(); + } + + bool ok(){ + return c->ok(); + } + + bool advance(){ + return c->advance(); + } + + bool currentMatches(){ + if ( ! c->matcher() ) + return true; + return c->matcher()->matchesCurrent( c.get() ); + } + + BSONObj current(){ + return c->current(); + } + private: void setLastLoc_inlock(DiskLoc); @@ -180,8 +299,6 @@ */ void updateLocation(); - void cleanupByLocation(DiskLoc loc); - void mayUpgradeStorage() { /* if ( !ids_.get() ) return; @@ -193,10 +310,10 @@ /** * @param millis amount of idle passed time since last call */ - bool shouldTimeout( unsigned millis ){ - _idleAgeMillis += millis; - return _idleAgeMillis > 600000 && _pinValue == 0; - } + bool shouldTimeout( unsigned millis ); + + void storeOpForSlave( DiskLoc last ); + void updateSlaveLocation( CurOp& curop ); unsigned idleTime(){ return _idleAgeMillis; @@ -209,16 +326,31 @@ void noTimeout() { _pinValue++; } + + multimap& byLoc() { + return _db->ccByLoc; + } public: void setDoingDeletes( bool doingDeletes ){ _doingDeletes = doingDeletes; } + + static void appendStats( BSONObjBuilder& result ); - static unsigned byLocSize(); // just for diagnostics + static unsigned numCursors() { return clientCursorsById.size(); } static void informAboutToDeleteBucket(const DiskLoc& b); static void aboutToDelete(const DiskLoc& dl); + + static void find( const string& ns , set& all ); + }; + + class ClientCursorMonitor : public BackgroundJob { + public: + void run(); + string name() { return "ClientCursorMonitor"; } }; + extern ClientCursorMonitor clientCursorMonitor; } // namespace mongo diff -Nru mongodb-1.4.4/db/client.h mongodb-1.6.3/db/client.h --- mongodb-1.4.4/db/client.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/client.h 2010-09-24 10:02:42.000000000 -0700 @@ -24,7 +24,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "security.h" #include "namespace.h" #include "lasterror.h" @@ -32,21 +32,30 @@ namespace mongo { + extern class ReplSet *theReplSet; class AuthenticationInfo; class Database; class CurOp; class Command; class Client; + class MessagingPort; extern boost::thread_specific_ptr currentClient; class Client : boost::noncopyable { public: + static Client *syncThread; + void iAmSyncThread() { + wassert( syncThread == 0 ); + syncThread = this; + } + bool isSyncThread() const { return this == syncThread; } // true if this client is the replication secondary pull thread + static mongo::mutex clientsMutex; static set clients; // always be in clientsMutex when manipulating this + static int recommendedYieldMicros( int * writers = 0 , int * readers = 0 ); - static int recommendedYieldMicros(); - + /* set _god=true temporarily, safely */ class GodScope { bool _prev; public: @@ -80,16 +89,16 @@ public: Context(const string& ns, string path=dbpath, mongolock * lock = 0 , bool doauth=true ) : _client( currentClient.get() ) , _oldContext( _client->_context ) , - _path( path ) , _lock( lock ) , - _ns( ns ){ + _path( path ) , _lock( lock ) , + _ns( ns ), _db(0){ _finishInit( doauth ); } /* this version saves the context but doesn't yet set the new one: */ - + Context() : _client( currentClient.get() ) , _oldContext( _client->_context ), - _path( dbpath ) , _lock(0) , _justCreated(false){ + _path( dbpath ) , _lock(0) , _justCreated(false), _db(0){ _client->_context = this; clear(); } @@ -101,20 +110,11 @@ Context( string ns , Database * db, bool doauth=true ); ~Context(); - - Client* getClient() const { return _client; } - - Database* db() const { - return _db; - } - const char * ns() const { - return _ns.c_str(); - } - - bool justCreated() const { - return _justCreated; - } + Client* getClient() const { return _client; } + Database* db() const { return _db; } + const char * ns() const { return _ns.c_str(); } + bool justCreated() const { return _justCreated; } bool equals( const string& ns , const string& path=dbpath ) const { return _ns == ns && _path == path; @@ -154,65 +154,97 @@ } friend class CurOp; - }; + }; // class Client::Context private: + void _dropns( const string& ns ); + CurOp * _curOp; Context * _context; bool _shutdown; - list _tempCollections; + set _tempCollections; const char *_desc; bool _god; AuthenticationInfo _ai; + ReplTime _lastOp; + BSONObj _handshake; + BSONObj _remoteId; public: - + MessagingPort * const _mp; + + string clientAddress() const; AuthenticationInfo * getAuthenticationInfo(){ return &_ai; } bool isAdmin() { return _ai.isAuthorized( "admin" ); } - - CurOp* curop() { return _curOp; } - + CurOp* curop() { return _curOp; } Context* getContext(){ return _context; } Database* database() { return _context ? _context->db() : 0; } - const char *ns() { return _context->ns(); } + const char *ns() const { return _context->ns(); } + const char *desc() const { return _desc; } - Client(const char *desc); + Client(const char *desc, MessagingPort *p = 0); ~Client(); - const char *desc() const { return _desc; } - - void addTempCollection( const string& ns ){ - _tempCollections.push_back( ns ); + void addTempCollection( const string& ns ); + + void _invalidateDB(const string& db); + static void invalidateDB(const string& db); + static void invalidateNS( const string& ns ); + + void setLastOp( ReplTime op ) { _lastOp = op; } + ReplTime getLastOp() const { return _lastOp; } + + /* report what the last operation was. used by getlasterror */ + void appendLastOp( BSONObjBuilder& b ) { + if( theReplSet ) { + b.append("lastOp" , (long long) _lastOp); + } + else { + OpTime lo(_lastOp); + if ( ! lo.isNull() ) + b.appendTimestamp( "lastOp" , lo.asDate() ); + } } /* each thread which does db operations has a Client object in TLS. call this when your thread starts. */ - static void initThread(const char *desc); + static Client& initThread(const char *desc, MessagingPort *mp = 0); /* this has to be called as the client goes away, but before thread termination @return true if anything was done */ bool shutdown(); - + + /* this is for map/reduce writes */ bool isGod() const { return _god; } friend class CurOp; string toString() const; + void gotHandshake( const BSONObj& o ); + BSONObj getRemoteID() const { return _remoteId; } + BSONObj getHandshake() const { return _handshake; } }; + /** get the Client object for this thread. */ inline Client& cc() { - return *currentClient.get(); + Client * c = currentClient.get(); + assert( c ); + return *c; } /* each thread which does db operations has a Client object in TLS. call this when your thread starts. */ - inline void Client::initThread(const char *desc) { + inline Client& Client::initThread(const char *desc, MessagingPort *mp) { + setThreadName(desc); assert( currentClient.get() == 0 ); - currentClient.reset( new Client(desc) ); + Client *c = new Client(desc, mp); + currentClient.reset(c); + mongo::lastError.initThread(); + return *c; } inline Client::GodScope::GodScope(){ @@ -247,8 +279,5 @@ string sayClientState(); - inline bool haveClient(){ - return currentClient.get() > 0; - } + inline bool haveClient() { return currentClient.get() > 0; } }; - diff -Nru mongodb-1.4.4/db/cloner.cpp mongodb-1.6.3/db/cloner.cpp --- mongodb-1.4.4/db/cloner.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/cloner.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,10 +16,10 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "pdfile.h" #include "../client/dbclient.h" -#include "../util/builder.h" +#include "../bson/util/builder.h" #include "jsobj.h" #include "query.h" #include "commands.h" @@ -37,10 +37,10 @@ auto_ptr< DBClientWithCommands > conn; void copy(const char *from_ns, const char *to_ns, bool isindex, bool logForRepl, bool masterSameProcess, bool slaveOk, Query q = Query()); - void replayOpLog( DBClientCursor *c, const BSONObj &query ); + struct Fun; public: Cloner() { } - + /* slaveOk - if true it is ok if the source of the data is !ismaster. useReplAuth - use the credentials we normally use as a replication slave for the cloning snapshot - use $snapshot mode for copying collections. note this should not be used when it isn't required, as it will be slower. @@ -48,8 +48,8 @@ */ void setConnection( DBClientWithCommands *c ) { conn.reset( c ); } bool go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot); - bool startCloneCollection( const char *fromhost, const char *ns, const BSONObj &query, string& errmsg, bool logForRepl, bool copyIndexes, int logSizeMb, long long &cursorId ); - bool finishCloneCollection( const char *fromhost, const char *ns, const BSONObj &query, long long cursorId, string &errmsg ); + + bool copyCollection( const string& from , const string& ns , const BSONObj& query , string& errmsg , bool copyIndexes = true, bool logForRepl = true ); }; /* for index info object: @@ -86,75 +86,108 @@ return res; } + struct Cloner::Fun { + void operator()( DBClientCursorBatchIterator &i ) { + mongolock l( true ); + if ( context ) { + context->relocked(); + } + + while( i.moreInCurrentBatch() ) { + if ( n % 128 == 127 /*yield some*/ ) { + dbtemprelease t; + } + + BSONObj tmp = i.nextSafe(); + + /* assure object is valid. note this will slow us down a little. */ + if ( !tmp.valid() ) { + stringstream ss; + ss << "Cloner: skipping corrupt object from " << from_collection; + BSONElement e = tmp.firstElement(); + try { + e.validate(); + ss << " firstElement: " << e; + } + catch( ... ){ + ss << " firstElement corrupt"; + } + out() << ss.str() << endl; + continue; + } + + ++n; + + BSONObj js = tmp; + if ( isindex ) { + assert( strstr(from_collection, "system.indexes") ); + js = fixindex(tmp); + storedForLater->push_back( js.getOwned() ); + continue; + } + + try { + theDataFileMgr.insertWithObjMod(to_collection, js); + if ( logForRepl ) + logOp("i", to_collection, js); + } + catch( UserException& e ) { + log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; + } + + RARELY if ( time( 0 ) - saveLast > 60 ) { + log() << n << " objects cloned so far from collection " << from_collection << endl; + saveLast = time( 0 ); + } + } + } + int n; + bool isindex; + const char *from_collection; + const char *to_collection; + time_t saveLast; + list *storedForLater; + bool logForRepl; + Client::Context *context; + }; + /* copy the specified collection isindex - if true, this is system.indexes collection, in which we do some transformation when copying. */ void Cloner::copy(const char *from_collection, const char *to_collection, bool isindex, bool logForRepl, bool masterSameProcess, bool slaveOk, Query query) { - auto_ptr c; - { - dbtemprelease r; - c = conn->query( from_collection, query, 0, 0, 0, QueryOption_NoCursorTimeout | ( slaveOk ? QueryOption_SlaveOk : 0 ) ); - } - list storedForLater; - massert( 13055 , "socket error in Cloner:copy" , c.get() ); - long long n = 0; - time_t saveLast = time( 0 ); - while ( 1 ) { - if( !c->moreInCurrentBatch() || n % 128 == 127 /*yield some*/ ) { - dbtemprelease r; - if ( !c->more() ) - break; - } - BSONObj tmp = c->next(); - - /* assure object is valid. note this will slow us down a little. */ - if ( !tmp.valid() ) { - stringstream ss; - ss << "Cloner: skipping corrupt object from " << from_collection; - BSONElement e = tmp.firstElement(); - try { - e.validate(); - ss << " firstElement: " << e; - } - catch( ... ){ - ss << " firstElement corrupt"; + Fun f; + f.n = 0; + f.isindex = isindex; + f.from_collection = from_collection; + f.to_collection = to_collection; + f.saveLast = time( 0 ); + f.storedForLater = &storedForLater; + f.logForRepl = logForRepl; + + int options = QueryOption_NoCursorTimeout | ( slaveOk ? QueryOption_SlaveOk : 0 ); + { + dbtemprelease r; + f.context = r._context; + DBClientConnection *remote = dynamic_cast< DBClientConnection* >( conn.get() ); + if ( remote ) { + remote->query( boost::function( f ), from_collection, query, 0, options ); + } else { // no exhaust mode for direct client, so we have this hack + auto_ptr c = conn->query( from_collection, query, 0, 0, 0, options ); + assert( c.get() ); + while( c->more() ) { + DBClientCursorBatchIterator i( *c ); + f( i ); } - out() << ss.str() << endl; - continue; - } - - ++n; - - BSONObj js = tmp; - if ( isindex ) { - assert( strstr(from_collection, "system.indexes") ); - js = fixindex(tmp); - storedForLater.push_back( js.getOwned() ); - continue; - } - - try { - theDataFileMgr.insert(to_collection, js); - if ( logForRepl ) - logOp("i", to_collection, js); - } - catch( UserException& e ) { - log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; - } - - RARELY if ( time( 0 ) - saveLast > 60 ) { - log() << n << " objects cloned so far from collection " << from_collection << endl; - saveLast = time( 0 ); } } - + if ( storedForLater.size() ){ for ( list::iterator i = storedForLater.begin(); i!=storedForLater.end(); i++ ){ BSONObj js = *i; try { - theDataFileMgr.insert(to_collection, js); + theDataFileMgr.insertWithObjMod(to_collection, js); if ( logForRepl ) logOp("i", to_collection, js); } @@ -164,7 +197,50 @@ } } } + + bool copyCollectionFromRemote(const string& host, const string& ns, const BSONObj& query, string& errmsg, bool logForRepl) { + Cloner c; + return c.copyCollection(host, ns, query, errmsg , /*copyIndexes*/ true, logForRepl); + } + + bool Cloner::copyCollection( const string& from , const string& ns , const BSONObj& query , string& errmsg , bool copyIndexes, bool logForRepl ) { + auto_ptr myconn; + myconn.reset( new DBClientConnection() ); + if ( ! myconn->connect( from , errmsg ) ) + return false; + + conn.reset( myconn.release() ); + + writelock lk(ns); // TODO: make this lower down + Client::Context ctx(ns); + + { // config + string temp = ctx.db()->name + ".system.namespaces"; + BSONObj config = conn->findOne( temp , BSON( "name" << ns ) ); + if ( config["options"].isABSONObj() ) + if ( ! userCreateNS( ns.c_str() , config["options"].Obj() , errmsg, true , 0 ) ) + return false; + } + + { // main data + copy( ns.c_str() , ns.c_str() , /*isindex*/false , logForRepl , false , true , Query(query).snapshot() ); + } + + /* TODO : copyIndexes bool does not seem to be implemented! */ + if( !copyIndexes ) { + log() << "ERROR copy collection copyIndexes not implemented? " << ns << endl; + } + + { // indexes + string temp = ctx.db()->name + ".system.indexes"; + copy( temp.c_str() , temp.c_str() , /*isindex*/true , logForRepl , false , true , BSON( "ns" << ns ) ); + } + return true; + } + extern bool inDBRepair; + void ensureIdIndexForNewNs(const char *ns); + bool Cloner::go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot) { massert( 10289 , "useReplAuth is not written to replication log", !useReplAuth || !logForRepl ); @@ -190,6 +266,7 @@ { dbtemprelease r; + // just using exhaust for collection copying right now auto_ptr c; { if ( conn.get() ) { @@ -228,14 +305,14 @@ const char *from_name = e.valuestr(); if( strstr(from_name, ".system.") ) { - /* system.users is cloned -- but nothing else from system. */ + /* system.users and s.js is cloned -- but nothing else from system. + * system.indexes is handled specially at the end*/ if( legalClientSystemNS( from_name , true ) == 0 ){ log(2) << "\t\t not cloning because system collection" << endl; continue; } } - if( strchr(from_name, '$') ) { - // don't clone index namespaces -- we take care of those separately below. + if( ! nsDollarCheck( from_name ) ){ log(2) << "\t\t not cloning because has $ " << endl; continue; } @@ -257,19 +334,38 @@ assert(p); string to_name = todb + p; + bool wantIdIndex = false; { string err; const char *toname = to_name.c_str(); - userCreateNS(toname, options, err, logForRepl); + /* we defer building id index for performance - building it in batch is much faster */ + userCreateNS(toname, options, err, logForRepl, &wantIdIndex); } log(1) << "\t\t cloning " << from_name << " -> " << to_name << endl; Query q; if( snapshot ) q.snapshot(); copy(from_name, to_name.c_str(), false, logForRepl, masterSameProcess, slaveOk, q); + + if( wantIdIndex ) { + /* we need dropDups to be true as we didn't do a true snapshot and this is before applying oplog operations + that occur during the initial sync. inDBRepair makes dropDups be true. + */ + bool old = inDBRepair; + try { + inDBRepair = true; + ensureIdIndexForNewNs(to_name.c_str()); + inDBRepair = old; + } + catch(...) { + inDBRepair = old; + throw; + } + } } // now build the indexes + string system_indexes_from = fromdb + ".system.indexes"; string system_indexes_to = todb + ".system.indexes"; /* [dm]: is the ID index sometimes not called "_id_"? There is other code in the system that looks for a "_id" prefix @@ -280,121 +376,6 @@ return true; } - - bool Cloner::startCloneCollection( const char *fromhost, const char *ns, const BSONObj &query, string &errmsg, bool logForRepl, bool copyIndexes, int logSizeMb, long long &cursorId ) { - char db[256]; - nsToDatabase( ns, db ); - - NamespaceDetails *nsd = nsdetails( ns ); - if ( nsd ){ - /** note: its ok to clone into a collection, but only if the range you're copying - doesn't exist on this server */ - string err; - if ( runCount( ns , BSON( "query" << query ) , err ) > 0 ){ - log() << "WARNING: data already exists for: " << ns << " in range : " << query << " deleting..." << endl; - deleteObjects( ns , query , false , logForRepl , false ); - } - } - - { - dbtemprelease r; - auto_ptr< DBClientConnection > c( new DBClientConnection() ); - if ( !c->connect( fromhost, errmsg ) ) - return false; - if( !replAuthenticate(c.get()) ) - return false; - conn = c; - - // Start temporary op log - BSONObjBuilder cmdSpec; - cmdSpec << "logCollection" << ns << "start" << 1; - if ( logSizeMb != INT_MIN ) - cmdSpec << "logSizeMb" << logSizeMb; - BSONObj info; - if ( !conn->runCommand( db, cmdSpec.done(), info ) ) { - errmsg = "logCollection failed: " + (string)info; - return false; - } - } - - if ( ! nsd ) { - BSONObj spec = conn->findOne( string( db ) + ".system.namespaces", BSON( "name" << ns ) ); - if ( !userCreateNS( ns, spec.getObjectField( "options" ), errmsg, true ) ) - return false; - } - - copy( ns, ns, false, logForRepl, false, false, query ); - - if ( copyIndexes ) { - string indexNs = string( db ) + ".system.indexes"; - copy( indexNs.c_str(), indexNs.c_str(), true, logForRepl, false, false, BSON( "ns" << ns << "name" << NE << "_id_" ) ); - } - - auto_ptr< DBClientCursor > c; - { - dbtemprelease r; - string logNS = "local.temp.oplog." + string( ns ); - c = conn->query( logNS.c_str(), Query(), 0, 0, 0, QueryOption_CursorTailable ); - } - if ( c->more() ) { - replayOpLog( c.get(), query ); - cursorId = c->getCursorId(); - massert( 10291 , "Expected valid tailing cursor", cursorId != 0 ); - } else { - massert( 10292 , "Did not expect valid cursor for empty query result", c->getCursorId() == 0 ); - cursorId = 0; - } - c->decouple(); - return true; - } - - void Cloner::replayOpLog( DBClientCursor *c, const BSONObj &query ) { - Matcher matcher( query ); - while( 1 ) { - BSONObj op; - { - dbtemprelease t; - if ( !c->more() ) - break; - op = c->next(); - } - // For sharding v1.0, we don't allow shard key updates -- so just - // filter each insert by value. - if ( op.getStringField( "op" )[ 0 ] != 'i' || matcher.matches( op.getObjectField( "o" ) ) ) - ReplSource::applyOperation( op ); - } - } - - bool Cloner::finishCloneCollection( const char *fromhost, const char *ns, const BSONObj &query, long long cursorId, string &errmsg ) { - char db[256]; - nsToDatabase( ns, db ); - - auto_ptr< DBClientCursor > cur; - { - dbtemprelease r; - auto_ptr< DBClientConnection > c( new DBClientConnection() ); - if ( !c->connect( fromhost, errmsg ) ) - return false; - if( !replAuthenticate(c.get()) ) - return false; - conn = c; - string logNS = "local.temp.oplog." + string( ns ); - if ( cursorId != 0 ) - cur = conn->getMore( logNS.c_str(), cursorId ); - else - cur = conn->query( logNS.c_str(), Query() ); - } - replayOpLog( cur.get(), query ); - { - dbtemprelease t; - BSONObj info; - if ( !conn->runCommand( db, BSON( "logCollection" << ns << "validateComplete" << 1 ), info ) ) { - errmsg = "logCollection failed: " + (string)info; - return false; - } - } - return true; - } /* slaveOk - if true it is ok if the source of the data is !ismaster. useReplAuth - use the credentials we normally use as a replication slave for the cloning @@ -413,169 +394,74 @@ */ class CmdClone : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream &help ) const { help << "clone this database from an instance of the db on another host\n"; - help << "example: { clone : \"host13\" }"; + help << "{ clone : \"host13\" }"; } CmdClone() : Command("clone") { } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string from = cmdObj.getStringField("clone"); if ( from.empty() ) return false; /* replication note: we must logOp() not the command, but the cloned data -- if the slave were to clone it would get a different point-in-time and not match. */ - return cloneFrom(from.c_str(), errmsg, cc().database()->name, + return cloneFrom(from.c_str(), errmsg, dbname, /*logForReplication=*/!fromRepl, /*slaveok*/false, /*usereplauth*/false, /*snapshot*/true); } } cmdclone; class CmdCloneCollection : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return NONE; } CmdCloneCollection() : Command("cloneCollection") { } virtual void help( stringstream &help ) const { - help << " example: { cloneCollection: , from: , query: }"; + help << "{ cloneCollection: , from: [,query: ] [,copyIndexes:] }" + "\nCopies a collection from one server to another. Do not use on a single server as the destination " + "is placed at the same db.collection (namespace) as the source.\n" + "Warning: the local copy of 'ns' is emptied before the copying begins. Any existing data will be lost there." + ; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string fromhost = cmdObj.getStringField("from"); if ( fromhost.empty() ) { - errmsg = "missing from spec"; + errmsg = "missing 'from' parameter"; return false; } + { + HostAndPort h(fromhost); + if( h.isSelf() ) { + errmsg = "can't cloneCollection from self"; + return false; + } + } string collection = cmdObj.getStringField("cloneCollection"); if ( collection.empty() ) { - errmsg = "missing cloneCollection spec"; + errmsg = "bad 'cloneCollection' value"; return false; } BSONObj query = cmdObj.getObjectField("query"); if ( query.isEmpty() ) query = BSONObj(); + BSONElement copyIndexesSpec = cmdObj.getField("copyindexes"); bool copyIndexes = copyIndexesSpec.isBoolean() ? copyIndexesSpec.boolean() : true; - // Will not be used if doesn't exist. - int logSizeMb = cmdObj.getIntField( "logSizeMb" ); - /* replication note: we must logOp() not the command, but the cloned data -- if the slave - were to clone it would get a different point-in-time and not match. - */ - Client::Context ctx( collection ); - - log() << "cloneCollection. db:" << ns << " collection:" << collection << " from: " << fromhost << " query: " << query << " logSizeMb: " << logSizeMb << ( copyIndexes ? "" : ", not copying indexes" ) << endl; + log() << "cloneCollection. db:" << dbname << " collection:" << collection << " from: " << fromhost + << " query: " << query << " " << ( copyIndexes ? "" : ", not copying indexes" ) << endl; Cloner c; - long long cursorId; - if ( !c.startCloneCollection( fromhost.c_str(), collection.c_str(), query, errmsg, !fromRepl, copyIndexes, logSizeMb, cursorId ) ) - return false; - return c.finishCloneCollection( fromhost.c_str(), collection.c_str(), query, cursorId, errmsg); + return c.copyCollection( fromhost , collection , query, errmsg , copyIndexes ); } } cmdclonecollection; - class CmdStartCloneCollection : public Command { - public: - virtual bool slaveOk() { - return false; - } - virtual LockType locktype(){ return WRITE; } - CmdStartCloneCollection() : Command("startCloneCollection") { } - virtual void help( stringstream &help ) const { - help << " example: { startCloneCollection: , from: , query: }"; - help << ", returned object includes a finishToken field, the value of which may be passed to the finishCloneCollection command"; - } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - string fromhost = cmdObj.getStringField("from"); - if ( fromhost.empty() ) { - errmsg = "missing from spec"; - return false; - } - string collection = cmdObj.getStringField("startCloneCollection"); - if ( collection.empty() ) { - errmsg = "missing startCloneCollection spec"; - return false; - } - BSONObj query = cmdObj.getObjectField("query"); - if ( query.isEmpty() ) - query = BSONObj(); - BSONElement copyIndexesSpec = cmdObj.getField("copyindexes"); - bool copyIndexes = copyIndexesSpec.isBoolean() ? copyIndexesSpec.boolean() : true; - // Will not be used if doesn't exist. - int logSizeMb = cmdObj.getIntField( "logSizeMb" ); - - /* replication note: we must logOp() not the command, but the cloned data -- if the slave - were to clone it would get a different point-in-time and not match. - */ - Client::Context ctx(collection); - - log() << "startCloneCollection. db:" << ns << " collection:" << collection << " from: " << fromhost << " query: " << query << endl; - - Cloner c; - long long cursorId; - bool res = c.startCloneCollection( fromhost.c_str(), collection.c_str(), query, errmsg, !fromRepl, copyIndexes, logSizeMb, cursorId ); - - if ( res ) { - BSONObjBuilder b; - b << "fromhost" << fromhost; - b << "collection" << collection; - b << "query" << query; - b.appendDate( "cursorId", cursorId ); - BSONObj token = b.done(); - result << "finishToken" << token; - } - return res; - } - } cmdstartclonecollection; - - class CmdFinishCloneCollection : public Command { - public: - virtual bool slaveOk() { - return false; - } - virtual LockType locktype(){ return WRITE; } - CmdFinishCloneCollection() : Command("finishCloneCollection") { } - virtual void help( stringstream &help ) const { - help << " example: { finishCloneCollection: }"; - } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - BSONObj fromToken = cmdObj.getObjectField("finishCloneCollection"); - if ( fromToken.isEmpty() ) { - errmsg = "missing finishCloneCollection finishToken spec"; - return false; - } - string fromhost = fromToken.getStringField( "fromhost" ); - if ( fromhost.empty() ) { - errmsg = "missing fromhost spec"; - return false; - } - string collection = fromToken.getStringField("collection"); - if ( collection.empty() ) { - errmsg = "missing collection spec"; - return false; - } - BSONObj query = fromToken.getObjectField("query"); - if ( query.isEmpty() ) { - query = BSONObj(); - } - long long cursorId = 0; - BSONElement cursorIdToken = fromToken.getField( "cursorId" ); - if ( cursorIdToken.type() == Date ) { - cursorId = cursorIdToken._numberLong(); - } - - Client::Context ctx( collection ); - - log() << "finishCloneCollection. db:" << ns << " collection:" << collection << " from: " << fromhost << " query: " << query << endl; - - Cloner c; - return c.finishCloneCollection( fromhost.c_str(), collection.c_str(), query, cursorId, errmsg ); - } - } cmdfinishclonecollection; thread_specific_ptr< DBClientConnection > authConn_; /* Usage: @@ -584,18 +470,18 @@ class CmdCopyDbGetNonce : public Command { public: CmdCopyDbGetNonce() : Command("copydbgetnonce") { } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream &help ) const { help << "get a nonce for subsequent copy db request from secure server\n"; help << "usage: {copydbgetnonce: 1, fromhost: }"; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string fromhost = cmdObj.getStringField("fromhost"); if ( fromhost.empty() ) { /* copy from self */ @@ -610,7 +496,7 @@ if ( !authConn_->connect( fromhost, errmsg ) ) return false; if( !authConn_->runCommand( "admin", BSON( "getnonce" << 1 ), ret ) ) { - errmsg = "couldn't get nonce " + string( ret ); + errmsg = "couldn't get nonce " + ret.toString(); return false; } } @@ -625,18 +511,18 @@ class CmdCopyDb : public Command { public: CmdCopyDb() : Command("copydb") { } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream &help ) const { help << "copy a database from another host to this host\n"; help << "usage: {copydb: 1, fromhost: , fromdb: , todb: [, username: , nonce: , key: ]}"; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string fromhost = cmdObj.getStringField("fromhost"); if ( fromhost.empty() ) { /* copy from self */ @@ -660,7 +546,7 @@ { dbtemprelease t; if ( !authConn_->runCommand( fromdb, BSON( "authenticate" << 1 << "user" << username << "nonce" << nonce << "key" << key ), ret ) ) { - errmsg = "unable to login " + string( ret ); + errmsg = "unable to login " + ret.toString(); return false; } } @@ -675,20 +561,20 @@ class CmdRenameCollection : public Command { public: CmdRenameCollection() : Command( "renameCollection" ) {} - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } virtual bool logTheOp() { return true; // can't log steps when doing fast rename within a db, so always log the op rather than individual steps comprising it. } virtual void help( stringstream &help ) const { help << " example: { renameCollection: foo.a, to: bar.b }"; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); if ( source.empty() || target.empty() ) { @@ -750,7 +636,7 @@ break; } BSONObj o = c->next(); - theDataFileMgr.insert( target.c_str(), o ); + theDataFileMgr.insertWithObjMod( target.c_str(), o ); } char cl[256]; @@ -780,7 +666,7 @@ } } BSONObj n = b.done(); - theDataFileMgr.insert( targetIndexes.c_str(), n ); + theDataFileMgr.insertWithObjMod( targetIndexes.c_str(), n ); } { diff -Nru mongodb-1.4.4/db/cmdline.cpp mongodb-1.6.3/db/cmdline.cpp --- mongodb-1.4.4/db/cmdline.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/cmdline.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,9 +16,10 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "cmdline.h" #include "commands.h" +#include "../util/processinfo.h" namespace po = boost::program_options; @@ -41,8 +42,10 @@ ("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)") ("quiet", "quieter output") ("port", po::value(&cmdLine.port), "specify port number") + ("bind_ip", po::value(&cmdLine.bind_ip), "comma separated list of ip addresses to listen on - all local ips by default") ("logpath", po::value() , "file to send all output to instead of stdout" ) ("logappend" , "append to logpath instead of over-writing" ) + ("pidfilepath", po::value(), "full path to pidfile (if not set, no pidfile is created)") #ifndef _WIN32 ("fork" , "fork server process" ) #endif @@ -114,25 +117,83 @@ cmdLine.quiet = true; } + string logpath; + #ifndef _WIN32 if (params.count("fork")) { if ( ! params.count( "logpath" ) ){ cout << "--fork has to be used with --logpath" << endl; ::exit(-1); } + + { // test logpath + logpath = params["logpath"].as(); + assert( logpath.size() ); + if ( logpath[0] != '/' ){ + char temp[256]; + assert( getcwd( temp , 256 ) ); + logpath = (string)temp + "/" + logpath; + } + FILE * test = fopen( logpath.c_str() , "a" ); + if ( ! test ){ + cout << "can't open [" << logpath << "] for log file: " << errnoWithDescription() << endl; + ::exit(-1); + } + fclose( test ); + } + + cout.flush(); + cerr.flush(); + pid_t c = fork(); if ( c ){ - cout << "forked process: " << c << endl; - ::exit(0); + _exit(0); + } + + if ( chdir("/") < 0 ){ + cout << "Cant chdir() while forking server process: " << strerror(errno) << endl; + ::exit(-1); } setsid(); + + pid_t c2 = fork(); + if ( c2 ){ + cout << "forked process: " << c2 << endl; + _exit(0); + } + + // stdout handled in initLogging + //fclose(stdout); + //freopen("/dev/null", "w", stdout); + + fclose(stderr); + fclose(stdin); + + FILE* f = freopen("/dev/null", "w", stderr); + if ( f == NULL ){ + cout << "Cant reassign stderr while forking server process: " << strerror(errno) << endl; + ::exit(-1); + } + + f = freopen("/dev/null", "r", stdin); + if ( f == NULL ){ + cout << "Cant reassign stdin while forking server process: " << strerror(errno) << endl; + ::exit(-1); + } + + setupCoreSignals(); setupSignals(); } #endif if (params.count("logpath")) { - string lp = params["logpath"].as(); - uassert( 10033 , "logpath has to be non-zero" , lp.size() ); - initLogging( lp , params.count( "logappend" ) ); + if ( logpath.size() == 0 ) + logpath = params["logpath"].as(); + uassert( 10033 , "logpath has to be non-zero" , logpath.size() ); + initLogging( logpath , params.count( "logappend" ) ); + } + + if ( params.count("pidfilepath")) { + writePidFile( params["pidfilepath"].as() ); } { @@ -144,15 +205,26 @@ return true; } + + void ignoreSignal( int signal ){ + } + + void setupCoreSignals(){ +#if !defined(_WIN32) + assert( signal(SIGUSR1 , rotateLogs ) != SIG_ERR ); + assert( signal(SIGHUP , ignoreSignal ) != SIG_ERR ); +#endif + } class CmdGetCmdLineOpts : Command{ public: CmdGetCmdLineOpts(): Command("getCmdLineOpts") {} - virtual LockType locktype() { return NONE; } - virtual bool adminOnly() { return true; } - virtual bool slaveOk() { return true; } + void help(stringstream& h) const { h << "get argv"; } + virtual LockType locktype() const { return NONE; } + virtual bool adminOnly() const { return true; } + virtual bool slaveOk() const { return true; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ + virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ result.append("argv", argvArray); return true; } diff -Nru mongodb-1.4.4/db/cmdline.h mongodb-1.6.3/db/cmdline.h --- mongodb-1.4.4/db/cmdline.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/cmdline.h 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" namespace mongo { @@ -25,8 +25,18 @@ /* concurrency: OK/READ */ struct CmdLine { int port; // --port + string bind_ip; // --bind_ip bool rest; // --rest + string _replSet; // --replSet[/] + string ourSetName() const { + string setname; + size_t sl = _replSet.find('/'); + if( sl == string::npos ) + return _replSet; + return _replSet.substr(0, sl); + } + string source; // --source string only; // --only @@ -43,6 +53,9 @@ int defaultProfile; // --profile int slowMS; // --time in ms that is "slow" + int pretouch; // --pretouch for replication application (experimental) + bool moveParanoia; // for move chunk paranoia + enum { DefaultDBPort = 27017, ConfigServerPort = 27019, @@ -51,7 +64,7 @@ CmdLine() : port(DefaultDBPort), rest(false), quiet(false), notablescan(false), prealloc(true), smallfiles(false), - quota(false), quotaFiles(8), cpu(false), oplogSize(0), defaultProfile(0), slowMS(100) + quota(false), quotaFiles(8), cpu(false), oplogSize(0), defaultProfile(0), slowMS(100), pretouch(0), moveParanoia( true ) { } @@ -71,4 +84,5 @@ extern CmdLine cmdLine; + void setupCoreSignals(); } diff -Nru mongodb-1.4.4/db/commands.cpp mongodb-1.6.3/db/commands.cpp --- mongodb-1.4.4/db/commands.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/commands.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,21 +17,104 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "jsobj.h" #include "commands.h" #include "client.h" -#include "replset.h" +#include "replpair.h" namespace mongo { + map * Command::_commandsByBestName; + map * Command::_webCommands; map * Command::_commands; - Command::Command(const char *_name) : name(_name) { + void Command::htmlHelp(stringstream& ss) const { + string helpStr; + { + stringstream h; + help(h); + helpStr = h.str(); + } + ss << "\n"; + bool web = _webCommands->count(name) != 0; + if( web ) ss << ""; + ss << name; + if( web ) ss << ""; + ss << "\n"; + ss << ""; + int l = locktype(); + //if( l == NONE ) ss << "N "; + if( l == READ ) ss << "R "; + else if( l == WRITE ) ss << "W "; + if( slaveOk() ) + ss << "S "; + if( adminOnly() ) + ss << "A"; + ss << ""; + ss << ""; + if( helpStr != "no help defined" ) { + const char *p = helpStr.c_str(); + while( *p ) { + if( *p == '<' ) { + ss << "<"; + p++; continue; + } + else if( *p == '{' ) + ss << ""; + else if( *p == '}' ) { + ss << "}"; + p++; + continue; + } + if( strncmp(p, "http:", 5) == 0 ) { + ss << ""; + q = p; + if( startsWith(q, "http://www.mongodb.org/display/") ) + q += 31; + while( *q && *q != ' ' && *q != '\n' ) { + ss << (*q == '+' ? ' ' : *q); + q++; + if( *q == '#' ) + while( *q && *q != ' ' && *q != '\n' ) q++; + } + ss << ""; + p = q; + continue; + } + if( *p == '\n' ) ss << "
"; + else ss << *p; + p++; + } + } + ss << ""; + ss << "\n"; + } + + Command::Command(const char *_name, bool web, const char *oldName) : name(_name) { // register ourself. if ( _commands == 0 ) _commands = new map; - (*_commands)[name] = this; + if( _commandsByBestName == 0 ) + _commandsByBestName = new map; + Command*& c = (*_commands)[name]; + if ( c ) + log() << "warning: 2 commands with name: " << _name << endl; + c = this; + (*_commandsByBestName)[name] = this; + + if( web ) { + if( _webCommands == 0 ) + _webCommands = new map; + (*_webCommands)[name] = this; + } + + if( oldName ) + (*_commands)[oldName] = this; } void Command::help( stringstream& help ) const { @@ -46,9 +129,7 @@ bool ok = false; bool valid = false; - BSONElement e; - e = jsobj.firstElement(); - + BSONElement e = jsobj.firstElement(); map::iterator i; if ( e.eoo() ) @@ -60,9 +141,9 @@ valid = true; string errmsg; Command *c = i->second; - if ( c->adminOnly() && strncmp(ns, "admin", 5) != 0 ) { + if ( c->adminOnly() && !startsWith(ns, "admin.") ) { ok = false; - errmsg = "access denied"; + errmsg = "access denied - use admin db"; } else if ( jsobj.getBoolField( "help" ) ){ stringstream help; @@ -71,7 +152,7 @@ anObjBuilder.append( "help" , help.str() ); } else { - ok = c->run(ns, jsobj, errmsg, anObjBuilder, false); + ok = c->run( nsToDatabase( ns ) , jsobj, errmsg, anObjBuilder, false); } BSONObj tmp = anObjBuilder.asTempObj(); @@ -106,5 +187,12 @@ return c->locktype(); } + void Command::logIfSlow( const Timer& timer, const string& msg ) { + int ms = timer.millis(); + if ( ms > cmdLine.slowMS ){ + out() << msg << " took " << ms << " ms." << endl; + } + } + } // namespace mongo diff -Nru mongodb-1.4.4/db/commands.h mongodb-1.6.3/db/commands.h --- mongodb-1.4.4/db/commands.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/commands.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" namespace mongo { @@ -27,14 +27,15 @@ class BufBuilder; class Client; -// db "commands" (sent via db.$cmd.findOne(...)) -// subclass to make a command. + /** mongodb "commands" (sent via db.$cmd.findOne(...)) + subclass to make a command. define a singleton object for it. + */ class Command { public: enum LockType { READ = -1 , NONE = 0 , WRITE = 1 }; - string name; + const string name; /* run the given command implement this... @@ -44,20 +45,22 @@ return value is true if succeeded. if false, set errmsg text. */ - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) = 0; + virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) = 0; /* note: logTheTop() MUST be false if READ if NONE, can't use Client::Context setup use with caution */ - virtual LockType locktype() = 0; + virtual LockType locktype() const = 0; /* Return true if only the admin ns has privileges to run this command. */ - virtual bool adminOnly() { + virtual bool adminOnly() const { return false; } + void htmlHelp(stringstream&) const; + /* Like adminOnly, but even stricter: we must either be authenticated for admin db, or, if running without auth, on the local interface. @@ -68,7 +71,7 @@ /* Return true if slaves of a replication pair are allowed to execute the command (the command directly from a client -- if fromRepl, always allowed). */ - virtual bool slaveOk() = 0; + virtual bool slaveOk() const = 0; /* Return true if the client force a command to be run on a slave by turning on the 'slaveok' option in the command query. @@ -82,9 +85,7 @@ Note if run() returns false, we do NOT log. */ - virtual bool logTheOp() { - return false; - } + virtual bool logTheOp() { return false; } virtual void help( stringstream& help ) const; @@ -93,7 +94,11 @@ */ virtual bool requiresAuth() { return true; } - Command(const char *_name); + /** @param webUI expose the command in the web ui as localhost:28017/ + @param oldName an optional old, deprecated name for the command + */ + Command(const char *_name, bool webUI = false, const char *oldName = 0); + virtual ~Command() {} protected: @@ -105,9 +110,16 @@ return BSONObj(); } + static void logIfSlow( const Timer& cmdTimer, const string& msg); + static map * _commands; + static map * _commandsByBestName; + static map * _webCommands; public: + static const map* commandsByBestName() { return _commandsByBestName; } + static const map* webCommands() { return _webCommands; } + /** @return if command was found and executed */ static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder); static LockType locktype( const string& name ); static Command * findCommand( const string& name ); diff -Nru mongodb-1.4.4/db/common.cpp mongodb-1.6.3/db/common.cpp --- mongodb-1.4.4/db/common.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/common.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -1,6 +1,21 @@ // common.cpp +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ -#include "stdafx.h" +#include "pch.h" #include "concurrency.h" /** @@ -9,6 +24,6 @@ namespace mongo { /* we use new here so we don't have to worry about destructor orders at program shutdown */ - MongoMutex &dbMutex( *(new MongoMutex) ); + MongoMutex &dbMutex( *(new MongoMutex("rw:dbMutex")) ); } diff -Nru mongodb-1.4.4/db/concurrency.h mongodb-1.6.3/db/concurrency.h --- mongodb-1.4.4/db/concurrency.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/concurrency.h 2010-09-24 10:02:42.000000000 -0700 @@ -29,25 +29,11 @@ #pragma once -#if BOOST_VERSION >= 103500 -#include -#undef assert -#define assert xassert -#define HAVE_READLOCK -#else -#warning built with boost version 1.34 or older - limited concurrency -#endif +#include "../util/concurrency/rwlock.h" +#include "../util/mmap.h" namespace mongo { - inline bool readLockSupported(){ -#ifdef HAVE_READLOCK - return true; -#else - return false; -#endif - } - string sayClientState(); bool haveClient(); @@ -87,11 +73,9 @@ } }; -#ifdef HAVE_READLOCK -//#if 0 class MongoMutex { MutexInfo _minfo; - boost::shared_mutex _m; + RWLock _m; ThreadLocalValue _state; /* we use a separate TLS value for releasedEarly - that is ok as @@ -99,13 +83,16 @@ */ ThreadLocalValue _releasedEarly; public: + MongoMutex(const char * name) : _m(name) { } + /** * @return * > 0 write lock * = 0 no lock * < 0 read lock */ - int getState(){ return _state.get(); } + int getState() { return _state.get(); } + bool isWriteLocked() { return getState() > 0; } void assertWriteLocked() { assert( getState() > 0 ); DEV assert( !_releasedEarly.get() ); @@ -129,7 +116,6 @@ } void lock() { - if ( _checkWriteLockAlready() ) return; @@ -140,27 +126,28 @@ curopGotLock(); _minfo.entered(); + + MongoFile::lockAll(); } - bool lock_try() { + bool lock_try( int millis ) { if ( _checkWriteLockAlready() ) - return true; - - curopWaitingForLock( 1 ); + return true; - boost::system_time until = get_system_time(); - until += boost::posix_time::milliseconds(0); - bool got = _m.timed_lock( until ); + curopWaitingForLock( 1 ); + bool got = _m.lock_try( millis ); curopGotLock(); if ( got ){ _minfo.entered(); _state.set(1); + MongoFile::lockAll(); } return got; } - + + void unlock() { //DEV cout << "UNLOCK" << endl; int s = _state.get(); @@ -175,6 +162,9 @@ } massert( 12599, "internal error: attempt to unlock when wasn't in a write lock", false); } + + MongoFile::unlockAll(); + _state.set(0); _minfo.leaving(); _m.unlock(); @@ -218,10 +208,8 @@ lock_shared(); return true; } - - boost::system_time until = get_system_time(); - until += boost::posix_time::milliseconds(2); - bool got = _m.timed_lock_shared( until ); + + bool got = _m.lock_shared_try( millis ); if ( got ) _state.set(-1); return got; @@ -246,82 +234,11 @@ MutexInfo& info() { return _minfo; } }; -#else - /* this will be for old versions of boost */ - class MongoMutex { - MutexInfo _minfo; - boost::recursive_mutex m; - ThreadLocalValue _releasedEarly; - public: - MongoMutex() { } - void lock() { -#ifdef HAVE_READLOCK - m.lock(); -#error this should be impossible? -#else - boost::detail::thread::lock_ops::lock(m); -#endif - _minfo.entered(); - } - - bool lock_try(){ - lock(); - return true; - } - - void releaseEarly() { - assertWriteLocked(); // aso must not be recursive, although we don't verify that in the old boost version - assert( !_releasedEarly.get() ); - _releasedEarly.set(true); - _unlock(); - } - - void _unlock() { - _minfo.leaving(); -#ifdef HAVE_READLOCK - m.unlock(); -#else - boost::detail::thread::lock_ops::unlock(m); -#endif - } - void unlock() { - if( _releasedEarly.get() ) { - _releasedEarly.set(false); - return; - } - _unlock(); - } - - void lock_shared() { lock(); } - bool lock_shared_try( int millis ) { - while ( millis-- ){ - if ( getState() ){ - sleepmillis(1); - continue; - } - lock_shared(); - return true; - } - return false; - } - - void unlock_shared() { unlock(); } - MutexInfo& info() { return _minfo; } - void assertWriteLocked() { - assert( info().isLocked() ); - } - void assertAtLeastReadLocked() { - assert( info().isLocked() ); - } - bool atLeastReadLocked() { return info().isLocked(); } - int getState(){ return info().isLocked() ? 1 : 0; } - }; -#endif extern MongoMutex &dbMutex; - void dbunlocking_write(); - void dbunlocking_read(); + inline void dbunlocking_write() { } + inline void dbunlocking_read() { } struct writelock { writelock(const string& ns) { @@ -357,29 +274,36 @@ dbMutex.unlock_shared(); } } - bool got(){ - return _got; - } + bool got() const { return _got; } + private: bool _got; }; struct writelocktry { - writelocktry( const string&ns ){ - _got = dbMutex.lock_try(); + writelocktry( const string&ns , int tryms ){ + _got = dbMutex.lock_try( tryms ); } ~writelocktry() { if ( _got ){ - dbunlocking_write(); + dbunlocking_read(); dbMutex.unlock(); } } - bool got(){ - return _got; - } + bool got() const { return _got; } + private: bool _got; }; - + struct readlocktryassert : public readlocktry { + readlocktryassert(const string& ns, int tryms) : + readlocktry(ns,tryms) { + uassert(13142, "timeout getting readlock", got()); + } + }; + + /** assure we have at least a read lock - they key with this being + if you have a write lock, that's ok too. + */ struct atleastreadlock { atleastreadlock( const string& ns ){ _prev = dbMutex.getState(); @@ -390,7 +314,7 @@ if ( _prev == 0 ) dbMutex.unlock_shared(); } - + private: int _prev; }; @@ -419,11 +343,9 @@ void releaseAndWriteLock(); }; - /* use writelock and readlock instead */ + /* use writelock and readlock instead */ struct dblock : public writelock { dblock() : writelock("") { } - ~dblock() { - } }; // eliminate diff -Nru mongodb-1.4.4/db/curop.h mongodb-1.6.3/db/curop.h --- mongodb-1.4.4/db/curop.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/curop.h 2010-09-24 10:02:42.000000000 -0700 @@ -20,11 +20,12 @@ #include "namespace.h" #include "client.h" -#include "../util/atomic_int.h" +#include "../bson/util/atomic_int.h" #include "db.h" namespace mongo { + /* lifespan is different than CurOp because of recursives with DBDirectClient */ class OpDebug { public: StringBuilder str; @@ -55,8 +56,7 @@ int _dbprofile; // 0=off, 1=slow, 2=all AtomicUInt _opNum; char _ns[Namespace::MaxNsLen+2]; - struct sockaddr_in _remote; - + struct SockAddr _remote; char _queryBuf[256]; void resetQuery(int x=0) { *((int *)_queryBuf) = x; } @@ -81,16 +81,11 @@ } public: + + int querySize() const { return *((int *) _queryBuf); } + bool haveQuery() const { return querySize() != 0; } - bool haveQuery() const { return *((int *) _queryBuf) != 0; } - - BSONObj query() { - if( *((int *) _queryBuf) == 1 ) { - return _tooBig; - } - BSONObj o(_queryBuf); - return o; - } + BSONObj query( bool threadSafe = false); void ensureStarted(){ if ( _start == 0 ) @@ -119,7 +114,7 @@ resetQuery(); } - void reset( const sockaddr_in & remote, int op ) { + void reset( const SockAddr & remote, int op ) { reset(); _remote = remote; _op = op; @@ -206,6 +201,10 @@ memcpy(_queryBuf, query.objdata(), query.objsize()); } + Client * getClient() const { + return _client; + } + CurOp( Client * client , CurOp * wrapped = 0 ) { _client = client; _wrapped = wrapped; @@ -223,10 +222,7 @@ memset(_queryBuf, 0, sizeof(_queryBuf)); } - ~CurOp(){ - if ( _wrapped ) - _client->_curOp = _wrapped; - } + ~CurOp(); BSONObj info() { if( ! cc().getAuthenticationInfo()->isAuthorized("admin") ) { @@ -237,28 +233,32 @@ return infoNoauth(); } - BSONObj infoNoauth(); + BSONObj infoNoauth( int attempt = 0 ); - string getRemoteString(){ - stringstream ss; - ss << inet_ntoa( _remote.sin_addr ) << ":" << ntohs( _remote.sin_port ); - return ss.str(); + string getRemoteString( bool includePort = true ){ + return _remote.toString(includePort); } ProgressMeter& setMessage( const char * msg , long long progressMeterTotal = 0 , int secondsBetween = 3 ){ - _message = msg; + if ( progressMeterTotal ){ - assert( ! _progressMeter.isActive() ); + if ( _progressMeter.isActive() ){ + cout << "about to assert, old _message: " << _message << " new message:" << msg << endl; + assert( ! _progressMeter.isActive() ); + } _progressMeter.reset( progressMeterTotal , secondsBetween ); } else { _progressMeter.finished(); } + + _message = msg; + return _progressMeter; } - - string getMessage() const { return _message; } - ProgressMeter getProgressMeter() { return _progressMeter; } + + string getMessage() const { return _message.toString(); } + ProgressMeter& getProgressMeter() { return _progressMeter; } friend class Client; }; diff -Nru mongodb-1.4.4/db/cursor.cpp mongodb-1.6.3/db/cursor.cpp --- mongodb-1.4.4/db/cursor.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/cursor.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -14,7 +14,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "pdfile.h" #include "curop.h" diff -Nru mongodb-1.4.4/db/cursor.h mongodb-1.6.3/db/cursor.h --- mongodb-1.4.4/db/cursor.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/cursor.h 2010-09-24 10:02:42.000000000 -0700 @@ -16,14 +16,16 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" #include "diskloc.h" +#include "matcher.h" namespace mongo { class Record; + class CoveredIndexMatcher; /* Query cursors, base class. This is for our internal cursors. "ClientCursor" is a separate concept and is for the user's cursor. @@ -31,13 +33,11 @@ WARNING concurrency: the vfunctions below are called back from within a ClientCursor::ccmutex. Don't cause a deadlock, you've been warned. */ - class Cursor { + class Cursor : boost::noncopyable { public: virtual ~Cursor() {} virtual bool ok() = 0; - bool eof() { - return !ok(); - } + bool eof() { return !ok(); } virtual Record* _current() = 0; virtual BSONObj current() = 0; virtual DiskLoc currLoc() = 0; @@ -78,10 +78,9 @@ virtual void checkLocation() { } virtual bool supportGetMore() = 0; - - virtual string toString() { - return "abstract?"; - } + virtual bool supportYields() = 0; + + virtual string toString() { return "abstract?"; } /* used for multikey index traversal to avoid sending back dups. see Matcher::matches(). if a multikey index traversal: @@ -93,10 +92,21 @@ */ virtual bool getsetdup(DiskLoc loc) = 0; - virtual BSONObj prettyIndexBounds() const { return BSONObj(); } + virtual BSONObj prettyIndexBounds() const { return BSONArray(); } virtual bool capped() const { return false; } + // The implementation may return different matchers depending on the + // position of the cursor. If matcher() is nonzero at the start, + // matcher() should be checked each time advance() is called. + virtual CoveredIndexMatcher *matcher() const { return 0; } + + // A convenience function for setting the value of matcher() manually + // so it may accessed later. Implementations which must generate + // their own matcher() should assert here. + virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { + massert( 13285, "manual matcher config not allowed", false ); + } }; // strategy object implementing direction of traversal. @@ -117,6 +127,7 @@ private: bool tailable_; + shared_ptr< CoveredIndexMatcher > _matcher; void init() { tailable_ = false; } @@ -161,6 +172,14 @@ virtual bool getsetdup(DiskLoc loc) { return false; } virtual bool supportGetMore() { return true; } + virtual bool supportYields() { return true; } + + virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); } + + virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { + _matcher = matcher; + } + }; /* used for order { $natural: -1 } */ diff -Nru mongodb-1.4.4/db/database.cpp mongodb-1.6.3/db/database.cpp --- mongodb-1.4.4/db/database.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/database.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,14 +16,64 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "pdfile.h" #include "database.h" +#include "instance.h" namespace mongo { bool Database::_openAllFiles = false; + Database::Database(const char *nm, bool& newDb, const string& _path ) + : name(nm), path(_path), namespaceIndex( path, name ) { + + { // check db name is valid + size_t L = strlen(nm); + uassert( 10028 , "db name is empty", L > 0 ); + uassert( 10029 , "bad db name [1]", *nm != '.' ); + uassert( 10030 , "bad db name [2]", nm[L-1] != '.' ); + uassert( 10031 , "bad char(s) in db name", strchr(nm, ' ') == 0 ); + uassert( 10032 , "db name too long", L < 64 ); + } + + newDb = namespaceIndex.exists(); + profile = 0; + profileName = name + ".system.profile"; + + { + vector others; + getDatabaseNames( others , path ); + + for ( unsigned i=0; i 64 ) + return false; + size_t good = strcspn( ns.c_str() , "/\\. \"" ); + return good == ns.size(); + } + + void Database::flushFiles( bool sync ){ + dbMutex.assertAtLeastReadLocked(); + for ( unsigned i=0; iflush( sync ); + } + } + } // namespace mongo diff -Nru mongodb-1.4.4/db/database.h mongodb-1.6.3/db/database.h --- mongodb-1.4.4/db/database.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/database.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,6 +22,7 @@ namespace mongo { + class ClientCursor; /** * Database represents a database database @@ -32,33 +33,7 @@ public: static bool _openAllFiles; - Database(const char *nm, bool& newDb, const string& _path = dbpath) - : name(nm), path(_path), namespaceIndex( path, name ) { - - { // check db name is valid - size_t L = strlen(nm); - uassert( 10028 , "db name is empty", L > 0 ); - uassert( 10029 , "bad db name [1]", *nm != '.' ); - uassert( 10030 , "bad db name [2]", nm[L-1] != '.' ); - uassert( 10031 , "bad char(s) in db name", strchr(nm, ' ') == 0 ); - uassert( 10032 , "db name too long", L < 64 ); - } - - newDb = namespaceIndex.exists(); - profile = 0; - profileName = name + ".system.profile"; - - // If already exists, open. Otherwise behave as if empty until - // there's a write, then open. - if ( ! newDb || cmdLine.defaultProfile ) { - namespaceIndex.init(); - if( _openAllFiles ) - openAllFiles(); - - } - - magic = 781231; - } + Database(const char *nm, bool& newDb, const string& _path = dbpath); ~Database() { magic = 0; @@ -114,7 +89,7 @@ namespaceIndex.init(); if ( n < 0 || n >= DiskLoc::MaxFiles ) { out() << "getFile(): n=" << n << endl; -#if !defined(_RECSTORE) +#if 0 if( n >= RecCache::Base && n <= RecCache::Base+1000 ) massert( 10294 , "getFile(): bad file number - using recstore db w/nonrecstore db build?", false); #endif @@ -137,8 +112,8 @@ int minSize = 0; if ( n != 0 && files[ n - 1 ] ) minSize = files[ n - 1 ]->getHeader()->fileLength; - if ( sizeNeeded + MDFHeader::headerSize() > minSize ) - minSize = sizeNeeded + MDFHeader::headerSize(); + if ( sizeNeeded + DataFileHeader::HeaderSize > minSize ) + minSize = sizeNeeded + DataFileHeader::HeaderSize; try { p->open( fullNameString.c_str(), minSize, preallocateOnly ); } @@ -205,6 +180,17 @@ bool setProfilingLevel( int newLevel , string& errmsg ); void finishInit(); + + static bool validDBName( const string& ns ); + + long long fileSize(){ + long long size=0; + for (int n=0; exists(n); n++) + size += boost::filesystem::file_size( fileName(n) ); + return size; + } + + void flushFiles( bool sync ); vector files; string name; // "alleyinsider" @@ -212,6 +198,9 @@ NamespaceIndex namespaceIndex; int profile; // 0=off. string profileName; // "alleyinsider.system.profile" + + multimap ccByLoc; + int magic; // used for making sure the object is still loaded in memory }; diff -Nru mongodb-1.4.4/db/db_10.sln mongodb-1.6.3/db/db_10.sln --- mongodb-1.4.4/db/db_10.sln 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/db_10.sln 2010-09-24 10:02:42.000000000 -0700 @@ -1,45 +1,144 @@  Microsoft Visual Studio Solution File, Format Version 11.00 # Visual Studio 2010 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{4082881B-EB00-486F-906C-843B8EC06E18}" + ProjectSection(SolutionItems) = preProject + driverHelpers.cpp = driverHelpers.cpp + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{2B262D59-9DC7-4BF1-A431-1BD4966899A5}" + ProjectSection(SolutionItems) = preProject + ..\tools\bridge.cpp = ..\tools\bridge.cpp + ..\tools\bsondump.cpp = ..\tools\bsondump.cpp + ..\tools\dump.cpp = ..\tools\dump.cpp + ..\tools\export.cpp = ..\tools\export.cpp + ..\tools\import.cpp = ..\tools\import.cpp + ..\tools\restore.cpp = ..\tools\restore.cpp + ..\tools\sniffer.cpp = ..\tools\sniffer.cpp + ..\tools\stat.cpp = ..\tools\stat.cpp + ..\tools\tool.cpp = ..\tools\tool.cpp + ..\tools\tool.h = ..\tools\tool.h + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "unix files", "unix files", "{2F760952-C71B-4865-998F-AABAE96D1373}" + ProjectSection(SolutionItems) = preProject + ..\util\processinfo_darwin.cpp = ..\util\processinfo_darwin.cpp + ..\util\processinfo_linux2.cpp = ..\util\processinfo_linux2.cpp + ..\util\processinfo_none.cpp = ..\util\processinfo_none.cpp + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shell", "shell", "{407B4B88-3451-433C-B74F-31B31FEB5791}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "other", "other", "{12B11474-2D74-48C3-BB3D-F03249BEA88F}" +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongod", "db.vcxproj", "{215B2D68-0A70-4D10-8E75-B31010C62A91}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongos", "..\s\dbgrid.vcxproj", "{E03717ED-69B4-4D21-BC55-DF6690B585C6}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "..\dbtests\test.vcxproj", "{215B2D68-0A70-4D10-8E75-B33010C62A91}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bsondemo", "..\bson\bsondemo\bsondemo.vcxproj", "{C9DB5EB7-81AA-4185-BAA1-DA035654402F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongoutils test program", "..\util\mongoutils\mongoutils.vcxproj", "{7B84584E-92BC-4DB9-971B-A1A8F93E5053}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "jstests", "jstests", "{F5ABFB2C-A34F-48C1-9B5F-01D456AF6C57}" + ProjectSection(SolutionItems) = preProject + ..\jstests\index_many.js = ..\jstests\index_many.js + ..\jstests\indexapi.js = ..\jstests\indexapi.js + ..\jstests\objid5.js = ..\jstests\objid5.js + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug Recstore|Win32 = Debug Recstore|Win32 + Debug|Any CPU = Debug|Any CPU + Debug|Mixed Platforms = Debug|Mixed Platforms Debug|Win32 = Debug|Win32 - release_nojni|Win32 = release_nojni|Win32 + Debug|x64 = Debug|x64 + Release|Any CPU = Release|Any CPU + Release|Mixed Platforms = Release|Mixed Platforms Release|Win32 = Release|Win32 + Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug Recstore|Win32.ActiveCfg = Debug Recstore|Win32 - {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug Recstore|Win32.Build.0 = Debug Recstore|Win32 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Any CPU.ActiveCfg = Debug|x64 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Mixed Platforms.Build.0 = Debug|x64 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Win32.Build.0 = Debug|Win32 - {215B2D68-0A70-4D10-8E75-B31010C62A91}.release_nojni|Win32.ActiveCfg = Release|Win32 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|x64.ActiveCfg = Debug|x64 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|x64.Build.0 = Debug|x64 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Any CPU.ActiveCfg = Release|x64 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Mixed Platforms.ActiveCfg = Release|x64 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Mixed Platforms.Build.0 = Release|x64 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Win32.ActiveCfg = Release|Win32 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Win32.Build.0 = Release|Win32 - {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug Recstore|Win32.ActiveCfg = Debug Recstore|Win32 - {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug Recstore|Win32.Build.0 = Debug Recstore|Win32 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|x64.ActiveCfg = Release|x64 + {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|x64.Build.0 = Release|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Any CPU.ActiveCfg = Debug|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Mixed Platforms.Build.0 = Debug|x64 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Win32.ActiveCfg = Debug|Win32 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Win32.Build.0 = Debug|Win32 - {E03717ED-69B4-4D21-BC55-DF6690B585C6}.release_nojni|Win32.ActiveCfg = Release|Win32 - {E03717ED-69B4-4D21-BC55-DF6690B585C6}.release_nojni|Win32.Build.0 = Release|Win32 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|x64.ActiveCfg = Debug|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|x64.Build.0 = Debug|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Any CPU.ActiveCfg = Release|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Mixed Platforms.ActiveCfg = Release|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Mixed Platforms.Build.0 = Release|x64 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Win32.ActiveCfg = Release|Win32 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Win32.Build.0 = Release|Win32 - {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug Recstore|Win32.ActiveCfg = Debug Recstore|Win32 - {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug Recstore|Win32.Build.0 = Debug Recstore|Win32 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|x64.ActiveCfg = Release|x64 + {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|x64.Build.0 = Release|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Any CPU.ActiveCfg = Debug|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Mixed Platforms.Build.0 = Debug|x64 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.Build.0 = Debug|Win32 - {215B2D68-0A70-4D10-8E75-B33010C62A91}.release_nojni|Win32.ActiveCfg = release_nojni|Win32 - {215B2D68-0A70-4D10-8E75-B33010C62A91}.release_nojni|Win32.Build.0 = release_nojni|Win32 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|x64.ActiveCfg = Debug|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|x64.Build.0 = Debug|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Any CPU.ActiveCfg = Release|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Mixed Platforms.ActiveCfg = Release|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Mixed Platforms.Build.0 = Release|x64 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.ActiveCfg = Release|Win32 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.Build.0 = Release|Win32 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|x64.ActiveCfg = Release|x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|x64.Build.0 = Release|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Any CPU.ActiveCfg = Debug|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Mixed Platforms.Build.0 = Debug|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Win32.ActiveCfg = Debug|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Win32.Build.0 = Debug|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|x64.ActiveCfg = Debug|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|x64.Build.0 = Debug|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Any CPU.ActiveCfg = Release|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Mixed Platforms.ActiveCfg = Release|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Mixed Platforms.Build.0 = Release|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Win32.ActiveCfg = Release|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Win32.Build.0 = Release|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|x64.ActiveCfg = Release|x64 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|x64.Build.0 = Release|x64 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Debug|Any CPU.ActiveCfg = Debug|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Debug|Mixed Platforms.Build.0 = Debug|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Debug|Win32.ActiveCfg = Debug|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Debug|Win32.Build.0 = Debug|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Debug|x64.ActiveCfg = Debug|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Release|Any CPU.ActiveCfg = Release|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Release|Mixed Platforms.ActiveCfg = Release|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Release|Mixed Platforms.Build.0 = Release|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Release|Win32.ActiveCfg = Release|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Release|Win32.Build.0 = Release|Win32 + {7B84584E-92BC-4DB9-971B-A1A8F93E5053}.Release|x64.ActiveCfg = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {2B262D59-9DC7-4BF1-A431-1BD4966899A5} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {2F760952-C71B-4865-998F-AABAE96D1373} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {407B4B88-3451-433C-B74F-31B31FEB5791} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {4082881B-EB00-486F-906C-843B8EC06E18} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {C9DB5EB7-81AA-4185-BAA1-DA035654402F} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {7B84584E-92BC-4DB9-971B-A1A8F93E5053} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {F5ABFB2C-A34F-48C1-9B5F-01D456AF6C57} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + EndGlobalSection EndGlobal diff -Nru mongodb-1.4.4/db/dbcommands_admin.cpp mongodb-1.6.3/db/dbcommands_admin.cpp --- mongodb-1.4.4/db/dbcommands_admin.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/dbcommands_admin.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -22,7 +22,7 @@ */ -#include "stdafx.h" +#include "pch.h" #include "jsobj.h" #include "pdfile.h" #include "namespace.h" @@ -35,37 +35,20 @@ namespace mongo { - class FeaturesCmd : public Command { - public: - FeaturesCmd() : Command( "features" ){} - - virtual bool slaveOk(){ return true; } - virtual bool readOnly(){ return true; } - virtual LockType locktype(){ return READ; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ - result.append( "readlock" , readLockSupported() ); - if ( globalScriptEngine ){ - BSONObjBuilder bb( result.subobjStart( "js" ) ); - result.append( "utf8" , globalScriptEngine->utf8Ok() ); - bb.done(); - } - return true; - } - - } featuresCmd; - class CleanCmd : public Command { public: CleanCmd() : Command( "clean" ){} - virtual bool slaveOk(){ return true; } - virtual LockType locktype(){ return WRITE; } + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return WRITE; } - bool run(const char *nsRaw, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - string dropns = cc().database()->name + "." + cmdObj.firstElement().valuestrsafe(); + virtual void help(stringstream& h) const { h << "internal"; } + + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + string dropns = dbname + "." + cmdObj.firstElement().valuestrsafe(); if ( !cmdLine.quiet ) - log() << "CMD: clean " << dropns << endl; + tlog() << "CMD: clean " << dropns << endl; NamespaceDetails *d = nsdetails(dropns.c_str()); @@ -87,18 +70,20 @@ public: ValidateCmd() : Command( "validate" ){} - virtual bool slaveOk(){ + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return WRITE; } + virtual void help(stringstream& h) const { h << "Validate contents of a namespace by scanning its data structures for correctness. Slow."; } + + virtual LockType locktype() const { return READ; } //{ validate: "collectionnamewithoutthedbpart" [, scandata: ] } */ - bool run(const char *nsRaw, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - string ns = cc().database()->name + "." + cmdObj.firstElement().valuestrsafe(); + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + string ns = dbname + "." + cmdObj.firstElement().valuestrsafe(); NamespaceDetails * d = nsdetails( ns.c_str() ); if ( !cmdLine.quiet ) - log() << "CMD: validate " << ns << endl; + tlog() << "CMD: validate " << ns << endl; if ( ! d ){ errmsg = "ns not found"; @@ -118,7 +103,7 @@ bool valid = true; stringstream ss; ss << "\nvalidate\n"; - ss << " details: " << hex << d << " ofs:" << nsindex(ns)->detailsOffset(d) << dec << endl; + //ss << " details: " << hex << d << " ofs:" << nsindex(ns)->detailsOffset(d) << dec << endl; if ( d->capped ) ss << " capped:" << d->capped << " max:" << d->max << '\n'; @@ -158,7 +143,7 @@ set recs; if( scanData ) { - auto_ptr c = theDataFileMgr.findAll(ns); + shared_ptr c = theDataFileMgr.findAll(ns); int n = 0; long long len = 0; long long nlen = 0; @@ -190,7 +175,7 @@ else ss << " (OK)"; ss << '\n'; } - ss << " " << n << " objects found, nobj:" << d->nrecords << "\n"; + ss << " " << n << " objects found, nobj:" << d->nrecords << '\n'; ss << " " << len << " bytes data w/headers\n"; ss << " " << nlen << " bytes data wout/headers\n"; } @@ -281,8 +266,8 @@ public: UnlockCommand() : Command( "unlock" ) { } virtual bool readOnly() { return true; } - virtual bool slaveOk(){ return true; } - virtual bool adminOnly(){ return true; } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return true; } virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( lockedForWriting ) { log() << "command: unlock requested" << endl; @@ -304,6 +289,7 @@ class FSyncCommand : public Command { class LockDBJob : public BackgroundJob { protected: + string name() { return "lockdbjob"; } void run() { Client::initThread("fsyncjob"); Client& c = cc(); @@ -337,14 +323,15 @@ }; public: FSyncCommand() : Command( "fsync" ){} - virtual LockType locktype(){ return WRITE; } - virtual bool slaveOk(){ return true; } - virtual bool adminOnly(){ return true; } + virtual LockType locktype() const { return WRITE; } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return true; } /*virtual bool localHostOnlyIfNoAuth(const BSONObj& cmdObj) { string x = cmdObj["exec"].valuestrsafe(); return !x.empty(); }*/ - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/fsync+Command"; } + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { /* async means do an fsync, but return immediately */ bool sync = ! cmdObj["async"].trueValue(); bool lock = cmdObj["lock"].trueValue(); @@ -376,18 +363,8 @@ } } fsyncCmd; + + - class LogRotateCmd : public Command { - public: - LogRotateCmd() : Command( "logRotate" ){} - virtual LockType locktype(){ return NONE; } - virtual bool slaveOk(){ return true; } - virtual bool adminOnly(){ return true; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - rotateLogs(); - return 1; - } - - } logRotateCmd; } diff -Nru mongodb-1.4.4/db/dbcommands.cpp mongodb-1.6.3/db/dbcommands.cpp --- mongodb-1.4.4/db/dbcommands.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/dbcommands.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,11 +15,11 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "query.h" #include "pdfile.h" #include "jsobj.h" -#include "../util/builder.h" +#include "../bson/util/builder.h" #include #include "introspect.h" #include "btree.h" @@ -28,7 +28,8 @@ #include "../util/processinfo.h" #include "json.h" #include "repl.h" -#include "replset.h" +#include "repl_block.h" +#include "replpair.h" #include "commands.h" #include "db.h" #include "instance.h" @@ -38,38 +39,13 @@ #include "../scripting/engine.h" #include "stats/counters.h" #include "background.h" +#include "../util/version.h" namespace mongo { - TicketHolder connTicketHolder( 20000 ); - extern int otherTraceLevel; void flushOpLog( stringstream &ss ); - class CmdShutdown : public Command { - public: - virtual bool requiresAuth() { return true; } - virtual bool adminOnly() { return true; } - virtual bool localHostOnlyIfNoAuth(const BSONObj& cmdObj) { return true; } - virtual bool logTheOp() { - return false; - } - virtual bool slaveOk() { - return true; - } - virtual LockType locktype(){ return WRITE; } - virtual void help( stringstream& help ) const { - help << "shutdown the database. must be ran against admin db and either (1) ran from localhost or (2) authenticated.\n"; - } - CmdShutdown() : Command("shutdown") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - cc().shutdown(); - log() << "terminating, shutdown command received" << endl; - dbexit( EXIT_CLEAN ); // this never returns - return true; - } - } cmdShutdown; - /* reset any errors so that getlasterror comes back clean. useful before performing a long series of operations where we want to @@ -78,19 +54,19 @@ */ class CmdResetError : public Command { public: - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } virtual bool logTheOp() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { help << "reset error state (used with getpreverror)"; } - CmdResetError() : Command("reseterror") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + CmdResetError() : Command("resetError", false, "reseterror") {} + bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.get(); assert( le ); le->reset(); @@ -98,77 +74,90 @@ } } cmdResetError; - /* for diagnostic / testing purposes. */ - class CmdSleep : public Command { - public: - virtual LockType locktype(){ return READ; } - virtual bool adminOnly() { return true; } - virtual bool logTheOp() { - return false; - } - virtual bool slaveOk() { - return true; - } - virtual void help( stringstream& help ) const { - help << "internal / make db block for 100 seconds"; - } - CmdSleep() : Command("sleep") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - sleepsecs(100); - return true; - } - } cmdSleep; + /* set by replica sets if specified in the configuration. + a pointer is used to avoid any possible locking issues with lockless reading (see below locktype() is NONE + and would like to keep that) + (for now, it simply orphans any old copy as config changes should be extremely rare). + note: once non-null, never goes to null again. + */ + BSONObj *getLastErrorDefault = 0; class CmdGetLastError : public Command { public: - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } virtual bool logTheOp() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { - help << "return error status of the last operation"; + help << "return error status of the last operation on this connection"; } - CmdGetLastError() : Command("getlasterror") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + CmdGetLastError() : Command("getLastError", false, "getlasterror") {} + bool run(const string& dbnamne, BSONObj& _cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.disableForCommand(); if ( le->nPrev != 1 ) LastError::noError.appendSelf( result ); else le->appendSelf( result ); + Client& c = cc(); + c.appendLastOp( result ); + + BSONObj cmdObj = _cmdObj; + { + BSONObj::iterator i(_cmdObj); + i.next(); + if( !i.more() ) { + /* empty, use default */ + BSONObj *def = getLastErrorDefault; + if( def ) + cmdObj = *def; + } + } + if ( cmdObj["fsync"].trueValue() ){ log() << "fsync from getlasterror" << endl; result.append( "fsyncFiles" , MemoryMappedFile::flushAll( true ) ); } - return true; - } - } cmdGetLastError; + BSONElement e = cmdObj["w"]; + if ( e.isNumber() ){ + int timeout = cmdObj["wtimeout"].numberInt(); + Timer t; + + int w = e.numberInt(); + + long long passes = 0; + char buf[32]; + while ( 1 ){ + if ( opReplicatedEnough( c.getLastOp() , w ) ) + break; + + if ( timeout > 0 && t.millis() >= timeout ){ + result.append( "wtimeout" , true ); + errmsg = "timed out waiting for slaves"; + result.append( "waited" , t.millis() ); + return false; + } - /* for testing purposes only */ - class CmdForceError : public Command { - public: - virtual bool logTheOp() { - return false; - } - virtual bool slaveOk() { - return true; - } - virtual LockType locktype(){ return NONE; } - CmdForceError() : Command("forceerror") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - uassert( 10038 , "forced error", false); + assert( sprintf( buf , "w block pass: %lld" , ++passes ) < 30 ); + c.curop()->setMessage( buf ); + sleepmillis(1); + killCurrentOp.checkForInterrupt(); + } + result.appendNumber( "wtime" , t.millis() ); + } + return true; } - } cmdForceError; + } cmdGetLastError; class CmdGetPrevError : public Command { public: - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } virtual bool logTheOp() { return false; @@ -176,11 +165,11 @@ virtual void help( stringstream& help ) const { help << "check for errors since last reseterror commandcal"; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - CmdGetPrevError() : Command("getpreverror") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + CmdGetPrevError() : Command("getPrevError", false, "getpreverror") {} + bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.disableForCommand(); le->appendSelf( result ); if ( le->valid ) @@ -191,32 +180,6 @@ } } cmdGetPrevError; - class CmdSwitchToClientErrors : public Command { - public: - virtual bool requiresAuth() { return false; } - virtual bool logTheOp() { - return false; - } - virtual void help( stringstream& help ) const { - help << "convert to id based errors rather than connection based"; - } - virtual bool slaveOk() { - return true; - } - virtual LockType locktype(){ return NONE; } - CmdSwitchToClientErrors() : Command("switchtoclienterrors") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - if ( lastError.getID() ){ - errmsg = "already in client id mode"; - return false; - } - LastError *le = lastError.disableForCommand(); - le->overridenById = true; - result << "ok" << 1; - return true; - } - } cmdSwitchToClientErrors; - class CmdDropDatabase : public Command { public: virtual bool logTheOp() { @@ -225,19 +188,19 @@ virtual void help( stringstream& help ) const { help << "drop (delete) this database"; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } CmdDropDatabase() : Command("dropDatabase") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - BSONElement e = cmdObj.getField(name); - log() << "dropDatabase " << ns << endl; + bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + BSONElement e = cmdObj.firstElement(); + log() << "dropDatabase " << dbnamne << endl; int p = (int) e.number(); if ( p != 1 ) return false; - dropDatabase(ns); - result.append( "dropped" , ns ); + dropDatabase(dbnamne); + result.append( "dropped" , dbnamne ); return true; } } cmdDropDatabase; @@ -247,17 +210,17 @@ virtual bool logTheOp() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { help << "repair database. also compacts. note: slow."; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } CmdRepairDatabase() : Command("repairDatabase") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - BSONElement e = cmdObj.getField(name); - log() << "repairDatabase " << ns << endl; + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + BSONElement e = cmdObj.firstElement(); + log() << "repairDatabase " << dbname << endl; int p = (int) e.number(); if ( p != 1 ) return false; @@ -265,7 +228,7 @@ bool preserveClonedFilesOnFailure = e.isBoolean() && e.boolean(); e = cmdObj.getField( "backupOriginalFiles" ); bool backupOriginalFiles = e.isBoolean() && e.boolean(); - return repairDatabase( ns, errmsg, preserveClonedFilesOnFailure, backupOriginalFiles ); + return repairDatabase( dbname, errmsg, preserveClonedFilesOnFailure, backupOriginalFiles ); } } cmdRepairDatabase; @@ -275,19 +238,25 @@ */ class CmdProfile : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { - help << "enable or disable performance profiling"; + help << "enable or disable performance profiling\n"; + help << "{ profile : }\n"; + help << "0=off 1=log slow ops 2=log all\n"; + help << "http://www.mongodb.org/display/DOCS/Database+Profiler"; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } CmdProfile() : Command("profile") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - BSONElement e = cmdObj.getField(name); - result.append("was", (double) cc().database()->profile); + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + BSONElement e = cmdObj.firstElement(); + result.append("was", cc().database()->profile); + result.append("slowms", cmdLine.slowMS ); + int p = (int) e.number(); bool ok = false; + if ( p == -1 ) ok = true; else if ( p >= 0 && p <= 2 ) { @@ -304,21 +273,29 @@ class CmdServerStatus : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - CmdServerStatus() : Command("serverStatus") { + CmdServerStatus() : Command("serverStatus", true) { started = time(0); } - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } + + virtual void help( stringstream& help ) const { + help << "returns lots of administrative server statistics"; + } + + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + long long start = Listener::getElapsedTimeMillis(); + BSONObjBuilder timeBuilder(128); + - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - bool authed = cc().getAuthenticationInfo()->isAuthorizedReads("admin"); result.append("version", versionString); result.append("uptime",(double) (time(0)-started)); + result.append("uptimeEstimate",(double) (start/1000)); result.appendDate( "localTime" , jsTime() ); { @@ -333,9 +310,18 @@ t.append("lockTime", tl); t.append("ratio", (tt ? tl/tt : 0)); + BSONObjBuilder ttt( t.subobjStart( "currentQueue" ) ); + int w=0, r=0; + Client::recommendedYieldMicros( &w , &r ); + ttt.append( "total" , w + r ); + ttt.append( "readers" , r ); + ttt.append( "writers" , w ); + ttt.done(); + result.append( "globalLock" , t.obj() ); } - + timeBuilder.appendNumber( "after basic" , Listener::getElapsedTimeMillis() - start ); + if ( authed ){ BSONObjBuilder t( result.subobjStart( "mem" ) ); @@ -358,6 +344,7 @@ t.done(); } + timeBuilder.appendNumber( "after is authed" , Listener::getElapsedTimeMillis() - start ); { BSONObjBuilder bb( result.subobjStart( "connections" ) ); @@ -365,6 +352,7 @@ bb.append( "available" , connTicketHolder.available() ); bb.done(); } + timeBuilder.appendNumber( "after connections" , Listener::getElapsedTimeMillis() - start ); if ( authed ){ BSONObjBuilder bb( result.subobjStart( "extra_info" ) ); @@ -372,26 +360,37 @@ ProcessInfo p; p.getExtraInfo(bb); bb.done(); + timeBuilder.appendNumber( "after extra info" , Listener::getElapsedTimeMillis() - start ); + } - { BSONObjBuilder bb( result.subobjStart( "indexCounters" ) ); globalIndexCounters.append( bb ); bb.done(); } - + { BSONObjBuilder bb( result.subobjStart( "backgroundFlushing" ) ); globalFlushCounters.append( bb ); bb.done(); } + { + BSONObjBuilder bb( result.subobjStart( "cursros" ) ); + ClientCursor::appendStats( bb ); + bb.done(); + } + + timeBuilder.appendNumber( "after counters" , Listener::getElapsedTimeMillis() - start ); + if ( anyReplEnabled() ){ BSONObjBuilder bb( result.subobjStart( "repl" ) ); appendReplicationInfo( bb , authed , cmdObj["repl"].numberInt() ); bb.done(); } + + timeBuilder.appendNumber( "after repl" , Listener::getElapsedTimeMillis() - start ); result.append( "opcounters" , globalOpCounters.getObj() ); @@ -405,44 +404,31 @@ asserts.done(); } + timeBuilder.appendNumber( "after asserts" , Listener::getElapsedTimeMillis() - start ); + if ( ! authed ) result.append( "note" , "run against admin for more info" ); + + if ( Listener::getElapsedTimeMillis() - start > 1000 ){ + BSONObj t = timeBuilder.obj(); + log() << "serverStatus was very slow: " << t << endl; + result.append( "timing" , t ); + } return true; } time_t started; } cmdServerStatus; - /* just to check if the db has asserted */ - class CmdAssertInfo : public Command { - public: - virtual bool slaveOk() { - return true; - } - virtual void help( stringstream& help ) const { - help << "check if any asserts have occurred on the server"; - } - virtual LockType locktype(){ return WRITE; } - CmdAssertInfo() : Command("assertinfo") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - result.appendBool("dbasserted", lastAssert[0].isSet() || lastAssert[1].isSet() || lastAssert[2].isSet()); - result.appendBool("asserted", lastAssert[0].isSet() || lastAssert[1].isSet() || lastAssert[2].isSet() || lastAssert[3].isSet()); - result.append("assert", lastAssert[AssertRegular].toString()); - result.append("assertw", lastAssert[AssertW].toString()); - result.append("assertmsg", lastAssert[AssertMsg].toString()); - result.append("assertuser", lastAssert[AssertUser].toString()); - return true; - } - } cmdAsserts; - class CmdGetOpTime : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return NONE; } + virtual void help( stringstream& help ) const { help << "internal"; } + virtual LockType locktype() const { return NONE; } CmdGetOpTime() : Command("getoptime") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { writelock l( "" ); result.appendDate("optime", OpTime::now().asDate()); return true; @@ -453,7 +439,7 @@ class Cmd : public Command { public: Cmd() : Command("") { } - bool adminOnly() { return true; } + bool adminOnly() const { return true; } bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result) { return true; } @@ -462,21 +448,22 @@ class CmdDiagLogging : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } CmdDiagLogging() : Command("diagLogging") { } - bool adminOnly() { + bool adminOnly() const { return true; } - virtual LockType locktype(){ return WRITE; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Monitoring+and+Diagnostics#MonitoringandDiagnostics-DatabaseRecord%2FReplay"; } + virtual LockType locktype() const { return WRITE; } + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { int was = _diaglog.setLevel( cmdObj.firstElement().numberInt() ); stringstream ss; flushOpLog( ss ); out() << ss.str() << endl; if ( !cmdLine.quiet ) - log() << "CMD: diagLogging set to " << _diaglog.level << " from: " << was << endl; + tlog() << "CMD: diagLogging set to " << _diaglog.level << " from: " << was << endl; result.append( "was" , was ); return true; } @@ -584,18 +571,19 @@ virtual bool logTheOp() { return true; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return false; } - virtual LockType locktype(){ return WRITE; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { - string nsToDrop = cc().database()->name + '.' + cmdObj.getField(name).valuestr(); + virtual void help( stringstream& help ) const { help << "drop a collection\n{drop : }"; } + virtual LockType locktype() const { return WRITE; } + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + string nsToDrop = dbname + '.' + cmdObj.firstElement().valuestr(); NamespaceDetails *d = nsdetails(nsToDrop.c_str()); if ( !cmdLine.quiet ) - log() << "CMD: drop " << nsToDrop << endl; + tlog() << "CMD: drop " << nsToDrop << endl; if ( d == 0 ) { errmsg = "ns not found"; return false; @@ -609,23 +597,24 @@ /* select count(*) */ class CmdCount : public Command { public: - virtual LockType locktype(){ return READ; } + virtual LockType locktype() const { return READ; } CmdCount() : Command("count") { } virtual bool logTheOp() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { // ok on --slave setups, not ok for nonmaster of a repl pair (unless override) return replSettings.slave == SimpleSlave; } virtual bool slaveOverrideOk() { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return false; } - virtual bool run(const char *_ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { - string ns = cc().database()->name + '.' + cmdObj.getField(name).valuestr(); + virtual void help( stringstream& help ) const { help << "count objects in collection"; } + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string err; long long n = runCount(ns.c_str(), cmdObj, err); long long nn = n; @@ -652,20 +641,20 @@ virtual bool logTheOp() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream& help ) const { help << "create a collection"; } - virtual bool run(const char *_ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { - string ns = cc().database()->name + '.' + cmdObj.getField(name).valuestr(); + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string err; - bool ok = userCreateNS(ns.c_str(), cmdObj, err, true); + bool ok = userCreateNS(ns.c_str(), cmdObj, err, ! fromRepl ); if ( !ok && !err.empty() ) errmsg = err; return ok; @@ -678,20 +667,20 @@ virtual bool logTheOp() { return true; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream& help ) const { help << "drop indexes for a collection"; } - CmdDropIndexes(const char *cmdname = "dropIndexes") : Command(cmdname) { } - bool run(const char *ns, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { - BSONElement e = jsobj.getField(name.c_str()); - string toDeleteNs = cc().database()->name + '.' + e.valuestr(); + CmdDropIndexes() : Command("dropIndexes", false, "deleteIndexes") { } + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { + BSONElement e = jsobj.firstElement(); + string toDeleteNs = dbname + '.' + e.valuestr(); NamespaceDetails *d = nsdetails(toDeleteNs.c_str()); if ( !cmdLine.quiet ) - log() << "CMD: dropIndexes " << toDeleteNs << endl; + tlog() << "CMD: dropIndexes " << toDeleteNs << endl; if ( d ) { BSONElement f = jsobj.getField("index"); if ( f.type() == String ) { @@ -701,7 +690,7 @@ int idxId = d->findIndexByKeyPattern( f.embeddedObject() ); if ( idxId < 0 ){ errmsg = "can't find index with key:"; - errmsg += f.embeddedObject(); + errmsg += f.embeddedObject().toString(); return false; } else { @@ -721,33 +710,24 @@ } } } cmdDropIndexes; - class CmdDeleteIndexes : public CmdDropIndexes { - public: - CmdDeleteIndexes() : CmdDropIndexes("deleteIndexes") { } - } cmdDeleteIndexes; class CmdReIndex : public Command { public: - virtual bool logTheOp() { - return true; - } - virtual bool slaveOk() { - return false; - } - virtual LockType locktype(){ return WRITE; } + virtual bool logTheOp() { return false; } // only reindexes on the one node + virtual bool slaveOk() const { return true; } // can reindex on a secondary + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream& help ) const { help << "re-index a collection"; } CmdReIndex() : Command("reIndex") { } - bool run(const char *ns, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { - BackgroundOperation::assertNoBgOpInProgForNs(ns); - + bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { static DBDirectClient db; - BSONElement e = jsobj.getField(name.c_str()); - string toDeleteNs = cc().database()->name + '.' + e.valuestr(); + BSONElement e = jsobj.firstElement(); + string toDeleteNs = dbname + '.' + e.valuestr(); NamespaceDetails *d = nsdetails(toDeleteNs.c_str()); - log() << "CMD: reIndex " << toDeleteNs << endl; + tlog() << "CMD: reIndex " << toDeleteNs << endl; + BackgroundOperation::assertNoBgOpInProgForNs(toDeleteNs.c_str()); if ( ! d ){ errmsg = "ns not found"; @@ -772,7 +752,7 @@ for ( list::iterator i=all.begin(); i!=all.end(); i++ ){ BSONObj o = *i; - db.insert( Namespace( toDeleteNs.c_str() ).getSisterNS( "system.indexes" ).c_str() , o ); + theDataFileMgr.insertWithObjMod( Namespace( toDeleteNs.c_str() ).getSisterNS( "system.indexes" ).c_str() , o , true ); } result.append( "ok" , 1 ); @@ -784,21 +764,19 @@ class CmdListDatabases : public Command { public: - virtual bool logTheOp() { - return false; - } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual bool slaveOverrideOk() { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return READ; } + virtual void help( stringstream& help ) const { help << "list databases on this server"; } CmdListDatabases() : Command("listDatabases") {} - bool run(const char *ns, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { vector< string > dbNames; getDatabaseNames( dbNames ); vector< BSONObj > dbInfos; @@ -846,11 +824,13 @@ */ class CmdCloseAllDatabases : public Command { public: - virtual bool adminOnly() { return true; } - virtual bool slaveOk() { return false; } - virtual LockType locktype(){ return WRITE; } + virtual void help( stringstream& help ) const { help << "Close all database files.\nA new request will cause an immediate reopening; thus, this is mostly for testing purposes."; } + virtual bool adminOnly() const { return true; } + virtual bool slaveOk() const { return false; } + virtual LockType locktype() const { return WRITE; } + CmdCloseAllDatabases() : Command( "closeAllDatabases" ) {} - bool run(const char *ns, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { return dbHolder.closeAll( dbpath , result, false ); } } cmdCloseAllDatabases; @@ -858,17 +838,15 @@ class CmdFileMD5 : public Command { public: CmdFileMD5() : Command( "filemd5" ){} - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { - help << " example: { filemd5 : ObjectId(aaaaaaa) , key : { ts : 1 } }"; + help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }"; } - virtual LockType locktype(){ return READ; } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - static DBDirectClient db; - - string ns = nsToDatabase( dbname ); + virtual LockType locktype() const { return READ; } + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + string ns = dbname; ns += "."; { string root = jsobj.getStringField( "root" ); @@ -878,41 +856,71 @@ } ns += ".chunks"; // make this an option in jsobj - BSONObjBuilder query; - query.appendAs( jsobj["filemd5"] , "files_id" ); - Query q( query.obj() ); - q.sort( BSON( "files_id" << 1 << "n" << 1 ) ); - md5digest d; md5_state_t st; md5_init(&st); - dbtemprelease temp; + BSONObj query = BSON( "files_id" << jsobj["filemd5"] ); + BSONObj sort = BSON( "files_id" << 1 << "n" << 1 ); + + shared_ptr cursor = bestGuessCursor(ns.c_str(), query, sort); + scoped_ptr cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns.c_str())); - auto_ptr cursor = db.query( ns.c_str() , q ); int n = 0; - while ( cursor->more() ){ - BSONObj c = cursor->next(); - int myn = c.getIntField( "n" ); - if ( n != myn ){ - log() << "should have chunk: " << n << " have:" << myn << endl; - uassert( 10040 , "chunks out of order" , n == myn ); + while ( cursor->ok() ){ + if ( ! cursor->matcher()->matchesCurrent( cursor.get() ) ){ + log() << "**** NOT MATCHING ****" << endl; + PRINT(cursor->current()); + cursor->advance(); + continue; } - int len; - const char * data = c["data"].binData( len ); - md5_append( &st , (const md5_byte_t*)(data + 4) , len - 4 ); + BSONObj obj = cursor->current(); + cursor->advance(); + + ClientCursor::YieldLock yield (cc); + try { + + BSONElement ne = obj["n"]; + assert(ne.isNumber()); + int myn = ne.numberInt(); + if ( n != myn ){ + log() << "should have chunk: " << n << " have:" << myn << endl; + + DBDirectClient client; + Query q(query); + q.sort(sort); + auto_ptr c = client.query(ns, q); + while(c->more()) + PRINT(c->nextSafe()); - n++; + uassert( 10040 , "chunks out of order" , n == myn ); + } + + int len; + const char * data = obj["data"].binDataClean( len ); + md5_append( &st , (const md5_byte_t*)(data) , len ); + + n++; + } catch (...) { + yield.relock(); // needed before yield goes out of scope + throw; + } + + if ( ! yield.stillOk() ){ + uasserted(13281, "File deleted during filemd5 command"); + } } + md5_finish(&st, d); + result.append( "numChunks" , n ); result.append( "md5" , digestToString( d ) ); return true; } } cmdFileMD5; - IndexDetails *cmdIndexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ) { + static IndexDetails *cmdIndexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ) { if ( ns[ 0 ] == '\0' || min.isEmpty() || max.isEmpty() ) { errmsg = "invalid command syntax (note: min and max are required)"; return 0; @@ -920,103 +928,96 @@ return indexDetailsForRange( ns, errmsg, min, max, keyPattern ); } - class CmdMedianKey : public Command { - public: - CmdMedianKey() : Command( "medianKey" ) {} - virtual bool slaveOk() { return true; } - virtual LockType locktype(){ return READ; } - virtual void help( stringstream &help ) const { - help << " example: { medianKey:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }\n" - "NOTE: This command may take awhile to run"; - } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - const char *ns = jsobj.getStringField( "medianKey" ); - BSONObj min = jsobj.getObjectField( "min" ); - BSONObj max = jsobj.getObjectField( "max" ); - BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); - - Client::Context ctx( ns ); - - IndexDetails *id = cmdIndexDetailsForRange( ns, errmsg, min, max, keyPattern ); - if ( id == 0 ) - return false; - - Timer t; - int num = 0; - NamespaceDetails *d = nsdetails(ns); - int idxNo = d->idxNo(*id); - for( BtreeCursor c( d, idxNo, *id, min, max, false, 1 ); c.ok(); c.advance(), ++num ); - num /= 2; - BtreeCursor c( d, idxNo, *id, min, max, false, 1 ); - for( ; num; c.advance(), --num ); - int ms = t.millis(); - if ( ms > cmdLine.slowMS ) { - out() << "Finding median for index: " << keyPattern << " between " << min << " and " << max << " took " << ms << "ms." << endl; - } - - if ( !c.ok() ) { - errmsg = "no index entries in the specified range"; - return false; - } - - result.append( "median", c.prettyKey( c.currKey() ) ); - return true; - } - } cmdMedianKey; - class CmdDatasize : public Command { public: - CmdDatasize() : Command( "datasize" ) {} - virtual bool slaveOk() { return true; } - virtual LockType locktype(){ return READ; } + CmdDatasize() : Command( "dataSize", false, "datasize" ) {} + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return READ; } virtual void help( stringstream &help ) const { help << - "\ndetermine data size for a set of data in a certain range" + "determine data size for a set of data in a certain range" "\nexample: { datasize:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }" "\nkeyPattern, min, and max parameters are optional." - "\nnot: This command may take a while to run"; + "\nnote: This command may take a while to run"; } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - const char *ns = jsobj.getStringField( "datasize" ); + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + Timer timer; + + string ns = jsobj.firstElement().String(); BSONObj min = jsobj.getObjectField( "min" ); BSONObj max = jsobj.getObjectField( "max" ); BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); + bool estimate = jsobj["estimate"].trueValue(); Client::Context ctx( ns ); + NamespaceDetails *d = nsdetails(ns.c_str()); - auto_ptr< Cursor > c; + if ( ! d || d->nrecords == 0 ){ + result.appendNumber( "size" , 0 ); + result.appendNumber( "numObjects" , 0 ); + result.append( "millis" , timer.millis() ); + return true; + } + + result.appendBool( "estimate" , estimate ); + + shared_ptr c; if ( min.isEmpty() && max.isEmpty() ) { - c = theDataFileMgr.findAll( ns ); - } else if ( min.isEmpty() || max.isEmpty() ) { + if ( estimate ){ + result.appendNumber( "size" , d->datasize ); + result.appendNumber( "numObjects" , d->nrecords ); + result.append( "millis" , timer.millis() ); + return 1; + } + c = theDataFileMgr.findAll( ns.c_str() ); + } + else if ( min.isEmpty() || max.isEmpty() ) { errmsg = "only one of min or max specified"; return false; - } else { - IndexDetails *idx = cmdIndexDetailsForRange( ns, errmsg, min, max, keyPattern ); + } + else { + IndexDetails *idx = cmdIndexDetailsForRange( ns.c_str(), errmsg, min, max, keyPattern ); if ( idx == 0 ) return false; - NamespaceDetails *d = nsdetails(ns); + c.reset( new BtreeCursor( d, d->idxNo(*idx), *idx, min, max, false, 1 ) ); } + + long long avgObjSize = d->datasize / d->nrecords; + + long long maxSize = jsobj["maxSize"].numberLong(); + long long maxObjects = jsobj["maxObjects"].numberLong(); - Timer t; long long size = 0; long long numObjects = 0; while( c->ok() ) { - size += c->current().objsize(); - c->advance(); + + if ( estimate ) + size += avgObjSize; + else + size += c->currLoc().rec()->netLength(); + numObjects++; - } - int ms = t.millis(); - if ( ms > cmdLine.slowMS ) { - if ( min.isEmpty() ) { - out() << "Finding size for ns: " << ns << " took " << ms << "ms." << endl; - } else { - out() << "Finding size for ns: " << ns << " between " << min << " and " << max << " took " << ms << "ms." << endl; + + if ( ( maxSize && size > maxSize ) || + ( maxObjects && numObjects > maxObjects ) ){ + result.appendBool( "maxReached" , true ); + break; } + + c->advance(); } - result.append( "size", (double)size ); - result.append( "numObjects" , (double)numObjects ); + ostringstream os; + os << "Finding size for ns: " << ns; + if ( ! min.isEmpty() ){ + os << " between " << min << " and " << max; + } + logIfSlow( timer , os.str() ); + + result.appendNumber( "size", size ); + result.appendNumber( "numObjects" , numObjects ); + result.append( "millis" , timer.millis() ); return true; } } cmdDatasize; @@ -1050,19 +1051,16 @@ class CollectionStats : public Command { public: - CollectionStats() : Command( "collstats" ) {} - virtual bool slaveOk() { return true; } - virtual LockType locktype(){ return READ; } + CollectionStats() : Command( "collStats", false, "collstats" ) {} + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return READ; } virtual void help( stringstream &help ) const { - help << " example: { collstats:\"blog.posts\" } "; + help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024"; } - bool run(const char *dbname_c, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - string dbname = dbname_c; - if ( dbname.find( "." ) != string::npos ) - dbname = dbname.substr( 0 , dbname.find( "." ) ); - + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ string ns = dbname + "." + jsobj.firstElement().valuestr(); - + Client::Context cx( ns ); + NamespaceDetails * nsd = nsdetails( ns.c_str() ); if ( ! nsd ){ errmsg = "ns not found"; @@ -1072,11 +1070,23 @@ result.append( "ns" , ns.c_str() ); int scale = 1; - if ( jsobj["scale"].isNumber() ) + if ( jsobj["scale"].isNumber() ){ scale = jsobj["scale"].numberInt(); + if ( scale <= 0 ){ + errmsg = "scale has to be > 0"; + return false; + } + + } + else if ( jsobj["scale"].trueValue() ){ + errmsg = "scale has to be a number > 0"; + return false; + } + long long size = nsd->datasize / scale; result.appendNumber( "count" , nsd->nrecords ); - result.appendNumber( "size" , nsd->datasize / scale ); + result.appendNumber( "size" , size ); + result.append ( "avgObjSize" , double(size) / double(nsd->nrecords) ); int numExtents; result.appendNumber( "storageSize" , nsd->storageSize( &numExtents ) / scale ); result.append( "numExtents" , numExtents ); @@ -1098,22 +1108,19 @@ } } cmdCollectionStatis; - class DBStats : public Command { public: - DBStats() : Command( "dbstats" ) {} - virtual bool slaveOk() { return true; } - virtual LockType locktype(){ return READ; } + DBStats() : Command( "dbStats", false, "dbstats" ) {} + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return READ; } virtual void help( stringstream &help ) const { help << " example: { dbstats:1 } "; } - bool run(const char *dbname_c, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - string dbname = dbname_c; - if ( dbname.find( "." ) != string::npos ) - dbname = dbname.substr( 0 , dbname.find( "." ) ); - - DBDirectClient client; - const list collections = client.getCollectionNames(dbname); + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + list collections; + Database* d = cc().database(); + if ( d ) + d->namespaceIndex.getNamespaces( collections ); long long ncollections = 0; long long objects = 0; @@ -1128,8 +1135,9 @@ NamespaceDetails * nsd = nsdetails( ns.c_str() ); if ( ! nsd ){ - // should this assert here? - continue; + errmsg = "missing ns: "; + errmsg += ns; + return false; } ncollections += 1; @@ -1146,42 +1154,28 @@ result.appendNumber( "collections" , ncollections ); result.appendNumber( "objects" , objects ); + result.append ( "avgObjSize" , double(size) / double(objects) ); result.appendNumber( "dataSize" , size ); result.appendNumber( "storageSize" , storageSize); result.appendNumber( "numExtents" , numExtents ); result.appendNumber( "indexes" , indexes ); result.appendNumber( "indexSize" , indexSize ); + result.appendNumber( "fileSize" , d->fileSize() ); return true; } } cmdDBStats; - class CmdBuildInfo : public Command { - public: - CmdBuildInfo() : Command( "buildinfo" ) {} - virtual bool slaveOk() { return true; } - virtual bool adminOnly() { return true; } - virtual LockType locktype(){ return NONE; } - virtual void help( stringstream &help ) const { - help << "example: { buildinfo:1 }"; - } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - result << "version" << versionString << "gitVersion" << gitVersion() << "sysInfo" << sysInfo(); - result << "bits" << ( sizeof( int* ) == 4 ? 32 : 64 ); - return true; - } - } cmdBuildInfo; - /* convertToCapped seems to use this */ class CmdCloneCollectionAsCapped : public Command { public: CmdCloneCollectionAsCapped() : Command( "cloneCollectionAsCapped" ) {} - virtual bool slaveOk() { return false; } - virtual LockType locktype(){ return WRITE; } + virtual bool slaveOk() const { return false; } + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream &help ) const { - help << "example: { cloneCollectionAsCapped:, toCollection:, size: }"; + help << "{ cloneCollectionAsCapped:, toCollection:, size: }"; } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ string from = jsobj.getStringField( "cloneCollectionAsCapped" ); string to = jsobj.getStringField( "toCollection" ); long long size = (long long)jsobj.getField( "size" ).number(); @@ -1191,11 +1185,8 @@ return false; } - char realDbName[256]; - nsToDatabase( dbname, realDbName ); - - string fromNs = string( realDbName ) + "." + from; - string toNs = string( realDbName ) + "." + to; + string fromNs = dbname + "." + from; + string toNs = dbname + "." + to; NamespaceDetails *nsd = nsdetails( fromNs.c_str() ); massert( 10301 , "source collection " + fromNs + " does not exist", nsd ); long long excessSize = nsd->datasize - size * 2; // datasize and extentSize can't be compared exactly, so add some padding to 'size' @@ -1209,9 +1200,8 @@ CursorId id; { - auto_ptr< Cursor > c = theDataFileMgr.findAll( fromNs.c_str(), startLoc ); - ClientCursor *cc = new ClientCursor(c, fromNs.c_str(), true); - cc->matcher.reset( new CoveredIndexMatcher( BSONObj(), fromjson( "{$natural:1}" ) ) ); + shared_ptr c = theDataFileMgr.findAll( fromNs.c_str(), startLoc ); + ClientCursor *cc = new ClientCursor(0, c, fromNs.c_str()); id = cc->cursorid; } @@ -1241,13 +1231,13 @@ class CmdConvertToCapped : public Command { public: CmdConvertToCapped() : Command( "convertToCapped" ) {} - virtual bool slaveOk() { return false; } - virtual LockType locktype(){ return WRITE; } + virtual bool slaveOk() const { return false; } + virtual LockType locktype() const { return WRITE; } virtual void help( stringstream &help ) const { - help << "example: { convertToCapped:, size: }"; + help << "{ convertToCapped:, size: }"; } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - BackgroundOperation::assertNoBgOpInProgForDb(dbname); + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + BackgroundOperation::assertNoBgOpInProgForDb(dbname.c_str()); string from = jsobj.getStringField( "convertToCapped" ); long long size = (long long)jsobj.getField( "size" ).number(); @@ -1257,29 +1247,27 @@ return false; } - char realDbName[256]; - nsToDatabase( dbname, realDbName ); - DBDirectClient client; - client.dropCollection( string( realDbName ) + "." + from + ".$temp_convertToCapped" ); + client.dropCollection( dbname + "." + from + ".$temp_convertToCapped" ); BSONObj info; - if ( !client.runCommand( realDbName, + if ( !client.runCommand( dbname , BSON( "cloneCollectionAsCapped" << from << "toCollection" << ( from + ".$temp_convertToCapped" ) << "size" << double( size ) ), info ) ) { - errmsg = "cloneCollectionAsCapped failed: " + string(info); + errmsg = "cloneCollectionAsCapped failed: " + info.toString(); return false; } - if ( !client.dropCollection( string( realDbName ) + "." + from ) ) { + if ( !client.dropCollection( dbname + "." + from ) ) { errmsg = "failed to drop original collection"; return false; } if ( !client.runCommand( "admin", - BSON( "renameCollection" << ( string( realDbName ) + "." + from + ".$temp_convertToCapped" ) << "to" << ( string( realDbName ) + "." + from ) ), + BSON( "renameCollection" << ( dbname + "." + from + ".$temp_convertToCapped" ) + << "to" << ( dbname + "." + from ) ), info ) ) { - errmsg = "renameCollection failed: " + string(info); + errmsg = "renameCollection failed: " + info.toString(); return false; } @@ -1290,10 +1278,11 @@ class GroupCommand : public Command { public: GroupCommand() : Command("group"){} - virtual LockType locktype(){ return READ; } - virtual bool slaveOk() { return true; } + virtual LockType locktype() const { return READ; } + virtual bool slaveOk() const { return true; } + virtual bool slaveOverrideOk() { return true; } virtual void help( stringstream &help ) const { - help << "see http://www.mongodb.org/display/DOCS/Aggregation"; + help << "http://www.mongodb.org/display/DOCS/Aggregation"; } BSONObj getKey( const BSONObj& obj , const BSONObj& keyPattern , ScriptingFunction func , double avgSize , Scope * s ){ @@ -1309,7 +1298,7 @@ return obj.extractFields( keyPattern , true ); } - bool group( string realdbname , auto_ptr cursor , + bool group( string realdbname , const string& ns , const BSONObj& query , BSONObj keyPattern , string keyFunctionCode , string reduceCode , const char * reduceScope , BSONObj initial , string finalize , string& errmsg , BSONObjBuilder& result ){ @@ -1349,8 +1338,17 @@ map map; list blah; - while ( cursor->more() ){ - BSONObj obj = cursor->next(); + shared_ptr cursor = bestGuessCursor(ns.c_str() , query , BSONObj() ); + + while ( cursor->ok() ){ + if ( cursor->matcher() && ! cursor->matcher()->matchesCurrent( cursor.get() ) ){ + cursor->advance(); + continue; + } + + BSONObj obj = cursor->current(); + cursor->advance(); + BSONObj key = getKey( obj , keyPattern , keyFunction , keysize / keynum , s.get() ); keysize += key.objsize(); keynum++; @@ -1392,8 +1390,7 @@ return true; } - bool run(const char *dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - static DBDirectClient db; + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ /* db.$cmd.findOne( { group :

} ) */ const BSONObj& p = jsobj.firstElement().embeddedObjectUserCheck(); @@ -1406,18 +1403,12 @@ else q = getQuery( p ); - string ns = dbname; - ns = ns.substr( 0 , ns.size() - 4 ); - string realdbname = ns.substr( 0 , ns.size() - 1 ); - if ( p["ns"].type() != String ){ errmsg = "ns has to be set"; return false; } - - ns += p["ns"].valuestr(); - - auto_ptr cursor = db.query( ns , q ); + + string ns = dbname + "." + p["ns"].String(); BSONObj key; string keyf; @@ -1429,7 +1420,7 @@ } } else if ( p["$keyf"].type() ){ - keyf = p["$keyf"].ascode(); + keyf = p["$keyf"]._asCode(); } else { // no key specified, will use entire object as key @@ -1450,10 +1441,10 @@ string finalize; if (p["finalize"].type()) - finalize = p["finalize"].ascode(); + finalize = p["finalize"]._asCode(); - return group( realdbname , cursor , - key , keyf , reduce.ascode() , reduce.type() != CodeWScope ? 0 : reduce.codeWScopeScopeData() , + return group( dbname , ns , q , + key , keyf , reduce._asCode() , reduce.type() != CodeWScope ? 0 : reduce.codeWScopeScopeData() , initial.embeddedObject() , finalize , errmsg , result ); } @@ -1464,44 +1455,44 @@ class DistinctCommand : public Command { public: DistinctCommand() : Command("distinct"){} - virtual bool slaveOk() { return true; } - virtual LockType locktype(){ return READ; } + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return READ; } virtual void help( stringstream &help ) const { - help << "{ distinct : 'collection name' , key : 'a.b' }"; + help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } - bool run(const char *dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ - static DBDirectClient db; + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + string ns = dbname + '.' + cmdObj.firstElement().valuestr(); - string ns = cc().database()->name + '.' + cmdObj.getField(name).valuestr(); string key = cmdObj["key"].valuestrsafe(); - BSONObj keyPattern = BSON( key << 1 ); - set map; - - long long size = 0; + BSONObj query = getQuery( cmdObj ); + + BSONElementSet values; + shared_ptr cursor = bestGuessCursor(ns.c_str() , query , BSONObj() ); + scoped_ptr cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); - auto_ptr cursor = db.query( ns , getQuery( cmdObj ) , 0 , 0 , &keyPattern ); - while ( cursor->more() ){ - BSONObj o = cursor->next(); - BSONObj value = o.extractFields( keyPattern ); - if ( value.isEmpty() ) - continue; - if ( map.insert( value ).second ){ - size += o.objsize() + 20; - uassert( 10044 , "distinct too big, 4mb cap" , size < 4 * 1024 * 1024 ); + while ( cursor->ok() ){ + if ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() ) ){ + BSONObj o = cursor->current(); + o.getFieldsDotted( key, values ); } + + cursor->advance(); + + if (!cc->yieldSometimes()) + break; } - assert( size <= 0x7fffffff ); - BSONObjBuilder b( (int) size ); - int n=0; - for ( set::iterator i = map.begin() ; i != map.end(); i++ ){ - b.appendAs( i->firstElement() , b.numStr( n++ ).c_str() ); + BSONArrayBuilder b( result.subarrayStart( "values" ) ); + for ( BSONElementSet::iterator i = values.begin() ; i != values.end(); i++ ){ + b.append( *i ); } + BSONObj arr = b.done(); - result.appendArray( "values" , b.obj() ); + uassert(10044, "distinct too big, 4mb cap", + (arr.objsize() + 1024) < (4 * 1024 * 1024)); return true; } @@ -1511,48 +1502,88 @@ /* Find and Modify an object returning either the old (default) or new value*/ class CmdFindAndModify : public Command { public: - /* {findandmodify: "collection", query: {processed:false}, update: {$set: {processed:true}}, new: true} - * {findandmodify: "collection", query: {processed:false}, remove: true, sort: {priority:-1}} - * - * either update or remove is required, all other fields have default values - * output is in the "value" field - */ - CmdFindAndModify() : Command("findandmodify") { } + virtual void help( stringstream &help ) const { + help << + "{ findandmodify: \"collection\", query: {processed:false}, update: {$set: {processed:true}}, new: true}\n" + "{ findandmodify: \"collection\", query: {processed:false}, remove: true, sort: {priority:-1}}\n" + "Either update or remove is required, all other fields have default values.\n" + "Output is in the \"value\" field\n"; + } + + CmdFindAndModify() : Command("findAndModify", false, "findandmodify") { } virtual bool logTheOp() { return false; // the modification will be logged directly } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype(){ return WRITE; } - virtual bool run(const char *dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual LockType locktype() const { return WRITE; } + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { static DBDirectClient db; - string ns = nsToDatabase(dbname) + '.' + cmdObj.firstElement().valuestr(); + string ns = dbname + '.' + cmdObj.firstElement().valuestr(); - Query q (cmdObj.getObjectField("query")); // defaults to {} + BSONObj origQuery = cmdObj.getObjectField("query"); // defaults to {} + Query q (origQuery); BSONElement sort = cmdObj["sort"]; if (!sort.eoo()) q.sort(sort.embeddedObjectUserCheck()); - BSONObj out = db.findOne(ns, q); - if (out.firstElement().eoo()){ - errmsg = "No matching object found"; - return false; - } + bool upsert = cmdObj["upsert"].trueValue(); - q = QUERY( "_id" << out["_id"]); + BSONObj fieldsHolder (cmdObj.getObjectField("fields")); + const BSONObj* fields = (fieldsHolder.isEmpty() ? NULL : &fieldsHolder); + + BSONObj out = db.findOne(ns, q, fields); + if (out.isEmpty()){ + if (!upsert){ + errmsg = "No matching object found"; + return false; + } - if (cmdObj["remove"].trueValue()){ - uassert(12515, "can't remove and update", cmdObj["update"].eoo()); - db.remove(ns, q, 1); - } else { BSONElement update = cmdObj["update"]; - uassert(12516, "must specify remove or update", !update.eoo()); - db.update(ns, q, update.embeddedObjectUserCheck()); + uassert(13329, "upsert mode requires update field", !update.eoo()); + uassert(13330, "upsert mode requires query field", !origQuery.isEmpty()); + db.update(ns, origQuery, update.embeddedObjectUserCheck(), true); + + if (cmdObj["new"].trueValue()){ + BSONObj gle = db.getLastErrorDetailed(); + + BSONElement _id = gle["upserted"]; + if (_id.eoo()) + _id = origQuery["_id"]; - if (cmdObj["new"].trueValue()) - out = db.findOne(ns, q); + out = db.findOne(ns, QUERY("_id" << _id), fields); + } + + } else { + + Query idQuery = QUERY( "_id" << out["_id"]); + + if (cmdObj["remove"].trueValue()){ + uassert(12515, "can't remove and update", cmdObj["update"].eoo()); + db.remove(ns, idQuery, 1); + + } else { // update + + // need to include original query for $ positional operator + BSONObjBuilder b; + b.append(out["_id"]); + BSONObjIterator it(origQuery); + while (it.more()){ + BSONElement e = it.next(); + if (strcmp(e.fieldName(), "_id")) + b.append(e); + } + q = Query(b.obj()); + + BSONElement update = cmdObj["update"]; + uassert(12516, "must specify remove or update", !update.eoo()); + db.update(ns, q, update.embeddedObjectUserCheck()); + + if (cmdObj["new"].trueValue()) + out = db.findOne(ns, idQuery, fields); + } } result.append("value", out); @@ -1565,20 +1596,17 @@ class CmdWhatsMyUri : public Command { public: CmdWhatsMyUri() : Command("whatsmyuri") { } - virtual bool logTheOp() { - return false; // the modification will be logged directly - } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } virtual void help( stringstream &help ) const { help << "{whatsmyuri:1}"; } - virtual bool run(const char *dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { BSONObj info = cc().curop()->infoNoauth(); result << "you" << info[ "client" ]; return true; @@ -1592,35 +1620,36 @@ virtual bool logTheOp() { return true; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } - virtual LockType locktype() { return WRITE; } + virtual LockType locktype() const { return WRITE; } virtual bool requiresAuth() { return true; } virtual void help( stringstream &help ) const { - help << "[for testing only]"; + help << "internal. for testing only."; } - virtual bool run(const char *dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { string coll = cmdObj[ "godinsert" ].valuestrsafe(); uassert( 13049, "godinsert must specify a collection", !coll.empty() ); - string ns = nsToDatabase( dbname ) + "." + coll; + string ns = dbname + "." + coll; BSONObj obj = cmdObj[ "obj" ].embeddedObjectUserCheck(); - DiskLoc loc = theDataFileMgr.insert( ns.c_str(), obj, true ); + DiskLoc loc = theDataFileMgr.insertWithObjMod( ns.c_str(), obj, true ); return true; } } cmdGodInsert; class DBHashCmd : public Command { public: - DBHashCmd() : Command( "dbhash" ){} - virtual bool slaveOk() { return true; } - virtual LockType locktype() { return READ; } - virtual bool run(const char * badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string dbname = nsToDatabase( badns ); - - list colls = _db.getCollectionNames( dbname ); + DBHashCmd() : Command( "dbHash", false, "dbhash" ){} + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return READ; } + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + list colls; + Database* db = cc().database(); + if ( db ) + db->namespaceIndex.getNamespaces( colls ); colls.sort(); result.appendNumber( "numCollections" , (long long)colls.size() ); @@ -1634,9 +1663,22 @@ if ( c.find( ".system.profil" ) != string::npos ) continue; - auto_ptr cursor; + shared_ptr cursor; NamespaceDetails * nsd = nsdetails( c.c_str() ); + + // debug SERVER-761 + NamespaceDetails::IndexIterator ii = nsd->ii(); + while( ii.more() ) { + const IndexDetails &idx = ii.next(); + if ( !idx.head.isValid() || !idx.info.isValid() ) { + log() << "invalid index for ns: " << c << " " << idx.head << " " << idx.info; + if ( idx.info.isValid() ) + log() << " " << idx.info.obj(); + log() << endl; + } + } + int idNum = nsd->findIdIndex(); if ( idNum >= 0 ){ cursor.reset( new BtreeCursor( nsd , idNum , nsd->idx( idNum ) , BSONObj() , BSONObj() , false , 1 ) ); @@ -1682,8 +1724,87 @@ return 1; } - DBDirectClient _db; } dbhashCmd; + + /* for diagnostic / testing purposes. */ + class CmdSleep : public Command { + public: + virtual LockType locktype() const { return NONE; } + virtual bool adminOnly() const { return true; } + virtual bool logTheOp() { return false; } + virtual bool slaveOk() const { return true; } + virtual void help( stringstream& help ) const { + help << "internal testing command. Makes db block (in a read lock) for 100 seconds\n"; + help << "w:true write lock"; + } + CmdSleep() : Command("sleep") { } + bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( cmdObj.getBoolField("w") ) { + writelock lk(""); + sleepsecs(100); + } + else { + readlock lk(""); + sleepsecs(100); + } + return true; + } + } cmdSleep; + + class AvailableQueryOptions : public Command { + public: + AvailableQueryOptions() : Command( "availablequeryoptions" ){} + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return NONE; } + virtual bool requiresAuth() { return false; } + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + result << "options" << QueryOption_AllSupported; + return true; + } + } availableQueryOptionsCmd; + + // just for testing + class CapTrunc : public Command { + public: + CapTrunc() : Command( "captrunc" ){} + virtual bool slaveOk() const { return false; } + virtual LockType locktype() const { return WRITE; } + virtual bool requiresAuth() { return true; } + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string coll = cmdObj[ "captrunc" ].valuestrsafe(); + uassert( 13416, "captrunc must specify a collection", !coll.empty() ); + string ns = dbname + "." + coll; + int n = cmdObj.getIntField( "n" ); + bool inc = cmdObj.getBoolField( "inc" ); + NamespaceDetails *nsd = nsdetails( ns.c_str() ); + ReverseCappedCursor c( nsd ); + massert( 13417, "captrunc invalid collection", c.ok() ); + for( int i = 0; i < n; ++i ) { + massert( 13418, "captrunc invalid n", c.advance() ); + } + DiskLoc end = c.currLoc(); + nsd->cappedTruncateAfter( ns.c_str(), end, inc ); + return true; + } + } capTruncCmd; + + // just for testing + class EmptyCapped : public Command { + public: + EmptyCapped() : Command( "emptycapped" ){} + virtual bool slaveOk() const { return false; } + virtual LockType locktype() const { return WRITE; } + virtual bool requiresAuth() { return true; } + virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string coll = cmdObj[ "emptycapped" ].valuestrsafe(); + uassert( 13428, "emptycapped must specify a collection", !coll.empty() ); + string ns = dbname + "." + coll; + NamespaceDetails *nsd = nsdetails( ns.c_str() ); + massert( 13429, "emptycapped no such collection", nsd ); + nsd->emptyCappedCollection( ns.c_str() ); + return true; + } + } emptyCappedCmd; /** * this handles @@ -1694,11 +1815,11 @@ */ bool execCommand( Command * c , Client& client , int queryOptions , - const char *ns, BSONObj& cmdObj , + const char *cmdns, BSONObj& cmdObj , BSONObjBuilder& result, bool fromRepl ){ - string dbname = nsToDatabase( ns ); + string dbname = nsToDatabase( cmdns ); AuthenticationInfo *ai = client.getAuthenticationInfo(); @@ -1711,7 +1832,7 @@ if ( c->adminOnly() && ! fromRepl && dbname != "admin" ) { - result.append( "errmsg" , "access denied" ); + result.append( "errmsg" , "access denied; use admin db" ); log() << "command denied: " << cmdObj.toString() << endl; return false; } @@ -1735,11 +1856,14 @@ result.append( "errmsg" , "not master" ); return false; } + + if ( c->adminOnly() ) + log( 2 ) << "command: " << cmdObj << endl; if ( c->locktype() == Command::NONE ){ // we also trust that this won't crash string errmsg; - int ok = c->run( ns , cmdObj , errmsg , result , fromRepl ); + int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl ); if ( ! ok ) result.append( "errmsg" , errmsg ); return ok; @@ -1747,40 +1871,30 @@ bool needWriteLock = c->locktype() == Command::WRITE; - if ( ! c->requiresAuth() && - ( ai->isAuthorizedReads( dbname ) && - ! ai->isAuthorized( dbname ) ) ){ - // this means that they can read, but not write - // so only get a read lock - needWriteLock = false; - } - if ( ! needWriteLock ){ assert( ! c->logTheOp() ); } mongolock lk( needWriteLock ); - Client::Context ctx( ns , dbpath , &lk , c->requiresAuth() ); - - if ( c->adminOnly() ) - log( 2 ) << "command: " << cmdObj << endl; + Client::Context ctx( dbname , dbpath , &lk , c->requiresAuth() ); try { string errmsg; - if ( ! c->run(ns, cmdObj, errmsg, result, fromRepl ) ){ + if ( ! c->run(dbname, cmdObj, errmsg, result, fromRepl ) ){ result.append( "errmsg" , errmsg ); return false; } } - catch ( AssertionException& e ){ + catch ( DBException& e ){ stringstream ss; - ss << "assertion: " << e.what(); + ss << "exception: " << e.what(); result.append( "errmsg" , ss.str() ); + result.append( "code" , e.getCode() ); return false; } if ( c->logTheOp() && ! fromRepl ){ - logOp("c", ns, cmdObj); + logOp("c", cmdns, cmdObj); } return true; @@ -1795,6 +1909,7 @@ returns true if ran a cmd */ bool _runCommands(const char *ns, BSONObj& _cmdobj, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) { + cc().curop()->ensureStarted(); string dbname = nsToDatabase( ns ); if( logLevel >= 1 ) @@ -1821,6 +1936,7 @@ BSONElement e = jsobj.firstElement(); Command * c = e.type() ? Command::findCommand( e.fieldName() ) : 0; + if ( c ){ ok = execCommand( c , client , queryOptions , ns , jsobj , anObjBuilder , fromRepl ); } @@ -1828,10 +1944,14 @@ anObjBuilder.append("errmsg", "no such cmd"); anObjBuilder.append("bad cmd" , _cmdobj ); } + + // switch to bool, but wait a bit longer before switching? + // anObjBuilder.append("ok", ok); anObjBuilder.append("ok", ok?1.0:0.0); BSONObj x = anObjBuilder.done(); - b.append((void*) x.objdata(), x.objsize()); + b.appendBuf((void*) x.objdata(), x.objsize()); + return true; } - + } // namespace mongo diff -Nru mongodb-1.4.4/db/dbcommands_generic.cpp mongodb-1.6.3/db/dbcommands_generic.cpp --- mongodb-1.4.4/db/dbcommands_generic.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/dbcommands_generic.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,230 @@ +// dbcommands_generic.cpp + +/** +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +/** + * commands suited for any mongo server + */ + +#include "pch.h" +#include "query.h" +#include "pdfile.h" +#include "jsobj.h" +#include "../bson/util/builder.h" +#include +#include "introspect.h" +#include "btree.h" +#include "../util/lruishmap.h" +#include "../util/md5.hpp" +#include "../util/processinfo.h" +#include "json.h" +#include "repl.h" +#include "repl_block.h" +#include "replpair.h" +#include "commands.h" +#include "db.h" +#include "instance.h" +#include "lasterror.h" +#include "security.h" +#include "queryoptimizer.h" +#include "../scripting/engine.h" +#include "stats/counters.h" +#include "background.h" +#include "../util/version.h" + +namespace mongo { + + class CmdBuildInfo : public Command { + public: + CmdBuildInfo() : Command( "buildInfo", true, "buildinfo" ) {} + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return true; } + virtual LockType locktype() const { return NONE; } + virtual void help( stringstream &help ) const { + help << "get version #, etc.\n"; + help << "{ buildinfo:1 }"; + } + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + result << "version" << versionString << "gitVersion" << gitVersion() << "sysInfo" << sysInfo(); + result << "bits" << ( sizeof( int* ) == 4 ? 32 : 64 ); + result.appendBool( "debug" , +#ifdef _DEBUG + true +#else + false +#endif + ); + return true; + } + } cmdBuildInfo; + + + /* just to check if the db has asserted */ + class CmdAssertInfo : public Command { + public: + virtual bool slaveOk() const { + return true; + } + virtual void help( stringstream& help ) const { + help << "check if any asserts have occurred on the server"; + } + virtual LockType locktype() const { return WRITE; } + CmdAssertInfo() : Command("assertInfo",true,"assertinfo") {} + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + result.appendBool("dbasserted", lastAssert[0].isSet() || lastAssert[1].isSet() || lastAssert[2].isSet()); + result.appendBool("asserted", lastAssert[0].isSet() || lastAssert[1].isSet() || lastAssert[2].isSet() || lastAssert[3].isSet()); + result.append("assert", lastAssert[AssertRegular].toString()); + result.append("assertw", lastAssert[AssertW].toString()); + result.append("assertmsg", lastAssert[AssertMsg].toString()); + result.append("assertuser", lastAssert[AssertUser].toString()); + return true; + } + } cmdAsserts; + + class PingCommand : public Command { + public: + PingCommand() : Command( "ping" ){} + virtual bool slaveOk() const { return true; } + virtual void help( stringstream &help ) const { help << "a way to check that the server is alive. responds immediately even if server is in a db lock."; } + virtual LockType locktype() const { return NONE; } + virtual bool requiresAuth() { return false; } + virtual bool run(const string& badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + // IMPORTANT: Don't put anything in here that might lock db - including authentication + return true; + } + } pingCmd; + + class FeaturesCmd : public Command { + public: + FeaturesCmd() : Command( "features", true ){} + void help(stringstream& h) const { h << "return on build level feature settings"; } + virtual bool slaveOk() const { return true; } + virtual bool readOnly(){ return true; } + virtual LockType locktype() const { return READ; } + virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ + if ( globalScriptEngine ){ + BSONObjBuilder bb( result.subobjStart( "js" ) ); + result.append( "utf8" , globalScriptEngine->utf8Ok() ); + bb.done(); + } + if ( cmdObj["oidReset"].trueValue() ){ + result.append( "oidMachineOld" , OID::staticMachine() ); + OID::newState(); + } + result.append( "oidMachine" , OID::staticMachine() ); + return true; + } + + } featuresCmd; + + class LogRotateCmd : public Command { + public: + LogRotateCmd() : Command( "logRotate" ){} + virtual LockType locktype() const { return NONE; } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return true; } + virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + rotateLogs(); + return 1; + } + + } logRotateCmd; + + class ListCommandsCmd : public Command { + public: + virtual void help( stringstream &help ) const { help << "get a list of all db commands"; } + ListCommandsCmd() : Command( "listCommands", false ){} + virtual LockType locktype() const { return NONE; } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return false; } + virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + BSONObjBuilder b( result.subobjStart( "commands" ) ); + for ( map::iterator i=_commands->begin(); i!=_commands->end(); ++i ){ + Command * c = i->second; + + // don't show oldnames + if (i->first != c->name) + continue; + + BSONObjBuilder temp( b.subobjStart( c->name.c_str() ) ); + + { + stringstream help; + c->help( help ); + temp.append( "help" , help.str() ); + } + temp.append( "lockType" , c->locktype() ); + temp.append( "slaveOk" , c->slaveOk() ); + temp.append( "adminOnly" , c->adminOnly() ); + temp.done(); + } + b.done(); + + return 1; + } + + } listCommandsCmd; + + class CmdShutdown : public Command { + public: + virtual bool requiresAuth() { return true; } + virtual bool adminOnly() const { return true; } + virtual bool localHostOnlyIfNoAuth(const BSONObj& cmdObj) { return true; } + virtual bool logTheOp() { + return false; + } + virtual bool slaveOk() const { + return true; + } + virtual LockType locktype() const { return WRITE; } + virtual void help( stringstream& help ) const { + help << "shutdown the database. must be ran against admin db and either (1) ran from localhost or (2) authenticated.\n"; + } + CmdShutdown() : Command("shutdown") {} + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + Client * c = currentClient.get(); + if ( c ) { + c->shutdown(); + } + log() << "terminating, shutdown command received" << endl; + dbexit( EXIT_CLEAN ); // this never returns + return true; + } + } cmdShutdown; + + /* for testing purposes only */ + class CmdForceError : public Command { + public: + virtual void help( stringstream& help ) const { + help << "for testing purposes only. forces a user assertion exception"; + } + virtual bool logTheOp() { + return false; + } + virtual bool slaveOk() const { + return true; + } + virtual LockType locktype() const { return NONE; } + CmdForceError() : Command("forceerror") {} + bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + uassert( 10038 , "forced error", false); + return true; + } + } cmdForceError; + + + +} diff -Nru mongodb-1.4.4/db/db.cpp mongodb-1.6.3/db/db.cpp --- mongodb-1.4.4/db/db.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/db.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -1,8 +1,7 @@ -// db.cpp : Defines the entry point for the console application. -// +// @file db.cpp : Defines the entry point for the mongod application. /** -* Copyright (C) 2008 10gen Inc.info +* Copyright (C) 2008 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, @@ -17,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "db.h" #include "query.h" #include "introspect.h" @@ -30,48 +29,56 @@ #include "clientcursor.h" #include "pdfile.h" #include "stats/counters.h" -#if !defined(_WIN32) -#include -#endif - -#if defined(_WIN32) -#include "../util/ntservice.h" -#endif - +#include "repl/rs.h" #include "../scripting/engine.h" #include "module.h" #include "cmdline.h" #include "stats/snapshots.h" +#include "../util/concurrency/task.h" +#include "../util/version.h" +#include "client.h" +#include "dbwebserver.h" -namespace mongo { - - CmdLine cmdLine; +#if defined(_WIN32) +# include "../util/ntservice.h" +#else +# include +#endif - bool useJNI = true; +namespace mongo { /* only off if --nocursors which is for debugging. */ extern bool useCursors; + /* only off if --nohints */ extern bool useHints; - bool noHttpInterface = false; - - extern string bind_ip; extern char *appsrvPath; extern int diagLogging; extern int lenForNewNsFiles; extern int lockFile; - + extern bool checkNsFilesOnLoad; extern string repairpath; +#if defined(_WIN32) + std::wstring windowsServiceName = L"MongoDB"; + std::wstring windowsServiceUser = L""; + std::wstring windowsServicePassword = L""; +#endif + void setupSignals(); - void closeAllSockets(); + void startReplSets(ReplSetCmdline*); void startReplication(); void pairWith(const char *remoteEnd, const char *arb); - void setRecCacheSize(unsigned MB); - void exitCleanly( ExitCode code ); + CmdLine cmdLine; + bool useJNI = true; + bool noHttpInterface = false; + bool shouldRepairDatabases = 0; + bool forceRepair = 0; + Timer startupSrandTimer; + const char *ourgetns() { Client *c = currentClient.get(); if ( ! c ) @@ -88,114 +95,125 @@ QueryResult* emptyMoreResult(long long); - void testTheDb() { - OpDebug debug; - Client::Context ctx("sys.unittest.pdfile"); - - /* this is not validly formatted, if you query this namespace bad things will happen */ - theDataFileMgr.insert("sys.unittest.pdfile", (void *) "hello worldx", 13); - theDataFileMgr.insert("sys.unittest.pdfile", (void *) "hello worldx", 13); - - BSONObj j1((const char *) &js1); - deleteObjects("sys.unittest.delete", j1, false); - theDataFileMgr.insert("sys.unittest.delete", &js1, sizeof(js1)); - deleteObjects("sys.unittest.delete", j1, false); - updateObjects("sys.unittest.delete", j1, j1, true,false,true,debug); - updateObjects("sys.unittest.delete", j1, j1, false,false,true,debug); - - auto_ptr c = theDataFileMgr.findAll("sys.unittest.pdfile"); - while ( c->ok() ) { - c->_current(); - c->advance(); - } - out() << endl; - } - - MessagingPort *connGrab = 0; - void connThread(); + void connThread( MessagingPort * p ); class OurListener : public Listener { public: OurListener(const string &ip, int p) : Listener(ip, p) { } virtual void accepted(MessagingPort *mp) { - assert( connGrab == 0 ); + if ( ! connTicketHolder.tryAcquire() ){ - log() << "connection refused because too many open connections" << endl; + log() << "connection refused because too many open connections: " << connTicketHolder.used() << " of " << connTicketHolder.outof() << endl; // TODO: would be nice if we notified them... mp->shutdown(); + delete mp; return; } - connGrab = mp; + try { - boost::thread thr(connThread); - while ( connGrab ) - sleepmillis(1); + boost::thread thr(boost::bind(&connThread,mp)); } catch ( boost::thread_resource_error& ){ log() << "can't create new thread, closing connection" << endl; mp->shutdown(); - connGrab = 0; + delete mp; } catch ( ... ){ log() << "unkonwn exception starting connThread" << endl; mp->shutdown(); - connGrab = 0; + delete mp; } } }; - void webServerThread(); +/* todo: make this a real test. the stuff in dbtests/ seem to do all dbdirectclient which exhaust doesn't support yet. */ +// QueryOption_Exhaust +#define TESTEXHAUST 0 +#if( TESTEXHAUST ) + void testExhaust() { + sleepsecs(1); + unsigned n = 0; + auto f = [&n](const BSONObj& o) { + assert( o.valid() ); + //cout << o << endl; + n++; + bool testClosingSocketOnError = false; + if( testClosingSocketOnError ) + assert(false); + }; + DBClientConnection db(false); + db.connect("localhost"); + const char *ns = "local.foo"; + if( db.count(ns) < 10000 ) + for( int i = 0; i < 20000; i++ ) + db.insert(ns, BSON("aaa" << 3 << "b" << "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); + + try { + db.query(f, ns, Query() ); + } + catch(...) { + cout << "hmmm" << endl; + } + + try { + db.query(f, ns, Query() ); + } + catch(...) { + cout << "caught" << endl; + } + + cout << n << endl; + }; +#endif void listen(int port) { - log() << mongodVersion() << endl; - printGitVersion(); - printSysInfo(); //testTheDb(); log() << "waiting for connections on port " << port << endl; - OurListener l(bind_ip, port); + OurListener l(cmdLine.bind_ip, port); + l.setAsTimeTracker(); startReplication(); if ( !noHttpInterface ) boost::thread thr(webServerThread); - if ( l.init() ) { - ListeningSockets::get()->add( l.socket() ); - l.listen(); - } - } - -} // namespace mongo - -#include "client.h" -namespace mongo { +#if(TESTEXHAUST) + boost::thread thr(testExhaust); +#endif + l.initAndListen(); + } - void sysRuntimeInfo() { - out() << "sysinfo:\n"; + void sysRuntimeInfo() { + out() << "sysinfo:\n"; #if defined(_SC_PAGE_SIZE) - out() << " page size: " << (int) sysconf(_SC_PAGE_SIZE) << endl; + out() << " page size: " << (int) sysconf(_SC_PAGE_SIZE) << endl; #endif #if defined(_SC_PHYS_PAGES) - out() << " _SC_PHYS_PAGES: " << sysconf(_SC_PHYS_PAGES) << endl; + out() << " _SC_PHYS_PAGES: " << sysconf(_SC_PHYS_PAGES) << endl; #endif #if defined(_SC_AVPHYS_PAGES) - out() << " _SC_AVPHYS_PAGES: " << sysconf(_SC_AVPHYS_PAGES) << endl; + out() << " _SC_AVPHYS_PAGES: " << sysconf(_SC_AVPHYS_PAGES) << endl; #endif - } + } + + /* if server is really busy, wait a bit */ + void beNice() { + sleepmicros( Client::recommendedYieldMicros() ); + } /* we create one thread for each connection from an app server database. app server will open a pool of threads. + todo: one day, asio... */ - void connThread() + void connThread( MessagingPort * inPort ) { TicketHolderReleaser connTicketReleaser( &connTicketHolder ); - Client::initThread("conn"); /* todo: move to Client object */ LastError *le = new LastError(); lastError.reset(le); - auto_ptr dbMsgPort( connGrab ); - connGrab = 0; - Client& c = cc(); + inPort->_logLevel = 1; + auto_ptr dbMsgPort( inPort ); + Client& c = Client::initThread("conn", inPort); try { @@ -211,7 +229,7 @@ dbMsgPort->shutdown(); break; } - +sendmore: if ( inShutdown() ) { log() << "got request after shutdown()" << endl; break; @@ -220,8 +238,8 @@ lastError.startRequest( m , le ); DbResponse dbresponse; - if ( !assembleResponse( m, dbresponse, dbMsgPort->farEnd.sa ) ) { - out() << curTimeMillis() % 10000 << " end msg " << dbMsgPort->farEnd.toString() << endl; + if ( !assembleResponse( m, dbresponse, dbMsgPort->farEnd ) ) { + log() << curTimeMillis() % 10000 << " end msg " << dbMsgPort->farEnd.toString() << endl; /* todo: we may not wish to allow this, even on localhost: very low priv accounts could stop us. */ if ( dbMsgPort->farEnd.isLocalHost() ) { dbMsgPort->shutdown(); @@ -230,17 +248,43 @@ dbexit(EXIT_CLEAN); } else { - out() << " (not from localhost, ignoring end msg)" << endl; + log() << " (not from localhost, ignoring end msg)" << endl; } } - if ( dbresponse.response ) + if ( dbresponse.response ) { dbMsgPort->reply(m, *dbresponse.response, dbresponse.responseTo); + if( dbresponse.exhaust ) { + MsgData *header = dbresponse.response->header(); + QueryResult *qr = (QueryResult *) header; + long long cursorid = qr->cursorId; + if( cursorid ) { + assert( dbresponse.exhaust && *dbresponse.exhaust != 0 ); + string ns = dbresponse.exhaust; // before reset() free's it... + m.reset(); + BufBuilder b(512); + b.appendNum((int) 0 /*size set later in appendData()*/); + b.appendNum(header->id); + b.appendNum(header->responseTo); + b.appendNum((int) dbGetMore); + b.appendNum((int) 0); + b.appendStr(ns); + b.appendNum((int) 0); // ntoreturn + b.appendNum(cursorid); + m.appendData(b.buf(), b.len()); + b.decouple(); + DEV log() << "exhaust=true sending more" << endl; + beNice(); + goto sendmore; + } + } + } } } - catch ( AssertionException& ) { - problem() << "AssertionException in connThread, closing client connection" << endl; + catch ( AssertionException& e ) { + log() << "AssertionException in connThread, closing client connection" << endl; + log() << ' ' << e.what() << endl; dbMsgPort->shutdown(); } catch ( SocketException& ) { @@ -266,15 +310,13 @@ globalScriptEngine->threadDone(); } - void msg(const char *m, const char *address, int port, int extras = 0) { - SockAddr db(address, port); -// SockAddr db("127.0.0.1", DBPort); -// SockAddr db("192.168.37.1", MessagingPort::DBPort); -// SockAddr db("10.0.21.60", MessagingPort::DBPort); -// SockAddr db("172.16.0.179", MessagingPort::DBPort); + // SockAddr db("127.0.0.1", DBPort); + // SockAddr db("192.168.37.1", MessagingPort::DBPort); + // SockAddr db("10.0.21.60", MessagingPort::DBPort); + // SockAddr db("172.16.0.179", MessagingPort::DBPort); MessagingPort p; if ( !p.connect(db) ){ @@ -288,7 +330,7 @@ Message response; send.setData( dbMsg , m); - int len = send.data->dataLen(); + int len = send.header()->dataLen(); for ( int i = 0; i < extras; i++ ) p.say(/*db, */send); @@ -297,7 +339,7 @@ bool ok = p.call(send, response); double tm = ((double) t.micros()) + 1; out() << " ****ok. response.data:" << ok << " time:" << tm / 1000.0 << "ms " - << "len: " << len << " data: " << response.data->_data << endl; + << "len: " << len << " data: " << response.singleData()->_data << endl; if ( q+1 < Loops ) { out() << "\t\tSLEEP 8 then sending again as a test" << endl; @@ -313,10 +355,7 @@ msg(m, "127.0.0.1", CmdLine::DefaultDBPort, extras); } - bool shouldRepairDatabases = 0; - bool forceRepair = 0; - - bool doDBUpgrade( const string& dbName , string errmsg , MDFHeader * h ){ + bool doDBUpgrade( const string& dbName , string errmsg , DataFileHeader * h ){ static DBDirectClient db; if ( h->version == 4 && h->versionMinor == 4 ){ @@ -344,13 +383,12 @@ return repairDatabase( dbName.c_str(), errmsg ); } - extern bool checkNsFilesOnLoad; - void repairDatabases() { + // LastError * le = lastError.get( true ); Client::GodScope gs; - log(1) << "enter repairDatabases" << endl; + log(1) << "enter repairDatabases (to check pdfile version #)" << endl; - assert(checkNsFilesOnLoad); + //assert(checkNsFilesOnLoad); checkNsFilesOnLoad = false; // we are mainly just checking the header - don't scan the whole .ns file for every db here. dblock lk; @@ -361,7 +399,7 @@ log(1) << "\t" << dbName << endl; Client::Context ctx( dbName ); MongoDataFile *p = cc().database()->getFile( 0 ); - MDFHeader *h = p->getHeader(); + DataFileHeader *h = p->getHeader(); if ( !h->currentVersion() || forceRepair ) { log() << "****" << endl; log() << "****" << endl; @@ -428,10 +466,14 @@ */ class DataFileSync : public BackgroundJob { public: + string name() { return "DataFileSync"; } void run(){ - if ( _sleepsecs > 2100 ) - _sleepsecs = 2100; - log(1) << "will flush memory every: " << _sleepsecs << " seconds" << endl; + if( _sleepsecs == 0 ) + log() << "warning: --syncdelay 0 is not recommended and can have strange performance" << endl; + else if( _sleepsecs == 1 ) + log() << "--syncdelay 1" << endl; + else if( _sleepsecs != 60 ) + log(1) << "--syncdelay " << _sleepsecs << endl; int time_flushing = 0; while ( ! inShutdown() ){ if ( _sleepsecs == 0 ){ @@ -440,61 +482,57 @@ continue; } - sleepmillis( (int)(std::max(0.0, (_sleepsecs * 1000) - time_flushing)) ); - + sleepmillis( (long long) std::max(0.0, (_sleepsecs * 1000) - time_flushing) ); + + if ( inShutdown() ){ + // occasional issue trying to flush during shutdown when sleep interrupted + break; + } + Date_t start = jsTime(); - MemoryMappedFile::flushAll( true ); + int numFiles = MemoryMappedFile::flushAll( true ); time_flushing = (int) (jsTime() - start); globalFlushCounters.flushed(time_flushing); - log(1) << "flushing mmap took " << time_flushing << "ms" << endl; + log(1) << "flushing mmap took " << time_flushing << "ms " << " for " << numFiles << " files" << endl; } } double _sleepsecs; // default value controlled by program options } dataFileSync; - void show_32_warning(){ -#if BOOST_VERSION < 103500 - cout << "\nwarning: built with boost version <= 1.34, limited concurrency" << endl; -#endif - - if ( sizeof(int*) != 4 ) - return; - cout << endl; - cout << "** NOTE: when using MongoDB 32 bit, you are limited to about 2 gigabytes of data" << endl; - cout << "** see http://blog.mongodb.org/post/137788967/32-bit-limitations for more" << endl; - cout << endl; - } + void _initAndListen(int listenPort, const char *appserverLoc = NULL) { - Timer startupSrandTimer; - - void _initAndListen(int listenPort, const char *appserverLoc = null) { + bool is32bit = sizeof(int*) == 4; + { #if !defined(_WIN32) - pid_t pid = 0; - pid = getpid(); + pid_t pid = getpid(); #else - int pid=0; + DWORD pid=GetCurrentProcessId(); #endif - - bool is32bit = sizeof(int*) == 4; - - log() << "Mongo DB : starting : pid = " << pid << " port = " << cmdLine.port << " dbpath = " << dbpath - << " master = " << replSettings.master << " slave = " << (int) replSettings.slave << " " << ( is32bit ? "32" : "64" ) << "-bit " << endl; - DEV log() << " FULL DEBUG ENABLED " << endl; - show_32_warning(); + Nullstream& l = log(); + l << "MongoDB starting : pid=" << pid << " port=" << cmdLine.port << " dbpath=" << dbpath; + if( replSettings.master ) l << " master=" << replSettings.master; + if( replSettings.slave ) l << " slave=" << (int) replSettings.slave; + l << ( is32bit ? " 32" : " 64" ) << "-bit " << endl; + } + DEV log() << "_DEBUG build (which is slower)" << endl; + show_warnings(); + log() << mongodVersion() << endl; + printGitVersion(); + printSysInfo(); { stringstream ss; ss << "dbpath (" << dbpath << ") does not exist"; - massert( 10296 , ss.str().c_str(), boost::filesystem::exists( dbpath ) ); + uassert( 10296 , ss.str().c_str(), boost::filesystem::exists( dbpath ) ); } { stringstream ss; ss << "repairpath (" << repairpath << ") does not exist"; - massert( 12590 , ss.str().c_str(), boost::filesystem::exists( repairpath ) ); + uassert( 12590 , ss.str().c_str(), boost::filesystem::exists( repairpath ) ); } acquirePathLock(); @@ -537,24 +575,34 @@ srand((unsigned) (curTimeMicros() ^ startupSrandTimer.micros())); snapshotThread.go(); + clientCursorMonitor.go(); + + if( !cmdLine._replSet.empty() ) { + replSet = true; + ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet); + boost::thread t( boost::bind( &startReplSets, replSetCmdline) ); + } + listen(listenPort); // listen() will return when exit code closes its socket. - while( 1 ) - sleepsecs( 100 ); + exitCleanly(EXIT_NET_ERROR); } - void initAndListen(int listenPort, const char *appserverLoc = null) { + + void testPretouch(); + + void initAndListen(int listenPort, const char *appserverLoc = NULL) { try { _initAndListen(listenPort, appserverLoc); } catch ( std::exception &e ) { - problem() << "exception in initAndListen std::exception: " << e.what() << ", terminating" << endl; + log() << "exception in initAndListen std::exception: " << e.what() << ", terminating" << endl; dbexit( EXIT_UNCAUGHT ); } catch ( int& n ){ - problem() << "exception in initAndListen int: " << n << ", terminating" << endl; + log() << "exception in initAndListen int: " << n << ", terminating" << endl; dbexit( EXIT_UNCAUGHT ); } catch(...) { - log() << " exception in initAndListen, terminating" << endl; + log() << "exception in initAndListen, terminating" << endl; dbexit( EXIT_UNCAUGHT ); } } @@ -569,16 +617,16 @@ } // namespace mongo - using namespace mongo; #include +#undef assert +#define assert MONGO_assert namespace po = boost::program_options; - void show_help_text(po::options_description options) { - show_32_warning(); + show_warnings(); cout << options << endl; }; @@ -605,6 +653,9 @@ getcurns = ourgetns; po::options_description general_options("General options"); + #if defined(_WIN32) + po::options_description windows_scm_options("Windows Service Control Manager options"); + #endif po::options_description replication_options("Replication options"); po::options_description sharding_options("Sharding options"); po::options_description visible_options("Allowed options"); @@ -615,9 +666,7 @@ CmdLine::addGlobalOptions( general_options , hidden_options ); general_options.add_options() - ("bind_ip", po::value(&bind_ip), - "local ip address to bind listener - all local ips bound by default") - ("dbpath", po::value()->default_value("/data/db/"), "directory for datafiles") + ("dbpath", po::value() , "directory for datafiles") ("directoryperdb", "each database will be stored in a separate directory") ("repairpath", po::value() , "root directory for repair files - defaults to dbpath" ) ("cpu", "periodically show cpu and iowait utilization") @@ -640,18 +689,28 @@ ("upgrade", "upgrade db if needed") ("repair", "run repair on all dbs") ("notablescan", "do not allow table scans") - ("syncdelay",po::value(&dataFileSync._sleepsecs)->default_value(60), "seconds between disk syncs (0 for never)") + ("syncdelay",po::value(&dataFileSync._sleepsecs)->default_value(60), "seconds between disk syncs (0=never, but not recommended)") ("profile",po::value(), "0=off 1=slow, 2=all") ("slowms",po::value(&cmdLine.slowMS)->default_value(100), "value of slow for profile and console log" ) ("maxConns",po::value(), "max number of simultaneous connections") -#if defined(_WIN32) + #if !defined(_WIN32) + ("nounixsocket", "disable listening on unix sockets") + #endif + ("ipv6", "enable IPv6 support (disabled by default)") + ; + #if defined(_WIN32) + windows_scm_options.add_options() ("install", "install mongodb service") ("remove", "remove mongodb service") + ("reinstall", "reinstall mongodb service (equivilant of mongod --remove followed by mongod --install)") ("service", "start mongodb service") -#endif - ; + ("serviceName", po::value(), "windows service name") + ("serviceUser", po::value(), "user name service executes as") + ("servicePassword", po::value(), "password used to authenticate serviceUser") + ; + #endif - replication_options.add_options() + replication_options.add_options() ("master", "master mode") ("slave", "slave mode") ("source", po::value(), "when slave: specify master as ") @@ -668,9 +727,12 @@ sharding_options.add_options() ("configsvr", "declare this is a config db of a cluster") ("shardsvr", "declare this is a shard db of a cluster") + ("noMoveParanoia" , "turn off paranoid saving of data for moveChunk. this is on by default for now, but default will switch" ) ; hidden_options.add_options() + ("pretouch", po::value(), "n pretouch threads for applying replicationed operations") + ("replSet", po::value(), "specify repl set seed hostnames format /,,etc...") ("command", po::value< vector >(), "command") ("cacheSize", po::value(), "cache size (in MB) for rec store") ; @@ -678,10 +740,14 @@ positional_options.add("command", 3); visible_options.add(general_options); + #if defined(_WIN32) + visible_options.add(windows_scm_options); + #endif visible_options.add(replication_options); visible_options.add(sharding_options); Module::addOptions( visible_options ); + setupCoreSignals(); setupSignals(); dbExecCommand = argv[0]; @@ -698,17 +764,15 @@ } } - DEV out() << "DEV is defined (using _DEBUG), which is slower...\n"; - UnitTest::runTests(); - if (argc == 1) { + if( argc == 1 ) cout << dbExecCommand << " --help for help and startup options" << endl; - } { bool installService = false; bool removeService = false; + bool reinstallService = false; bool startService = false; po::variables_map params; @@ -719,7 +783,6 @@ return 0; } - if ( ! CmdLine::store( argc , argv , visible_options , hidden_options , positional_options , params ) ) return 0; @@ -732,7 +795,11 @@ printGitVersion(); return 0; } - dbpath = params["dbpath"].as(); + if ( params.count( "dbpath" ) ) + dbpath = params["dbpath"].as(); + else + dbpath = "/data/db/"; + if ( params.count("directoryperdb")) { directoryperdb = true; } @@ -809,11 +876,24 @@ cmdLine.notablescan = true; } if (params.count("install")) { + if ( ! params.count( "logpath" ) ){ + cout << "--install has to be used with --logpath" << endl; + ::exit(-1); + } + installService = true; } if (params.count("remove")) { removeService = true; } + if (params.count("reinstall")) { + if ( ! params.count( "logpath" ) ){ + cout << "--reinstall has to be used with --logpath" << endl; + ::exit(-1); + } + + reinstallService = true; + } if (params.count("service")) { startService = true; } @@ -836,10 +916,29 @@ /* specifies what the source in local.sources should be */ cmdLine.source = params["source"].as().c_str(); } + if( params.count("pretouch") ) { + cmdLine.pretouch = params["pretouch"].as(); + } + if (params.count("replSet")) { + if (params.count("slavedelay")) { + cout << "--slavedelay cannot be used with --replSet" << endl; + ::exit(-1); + } else if (params.count("only")) { + cout << "--only cannot be used with --replSet" << endl; + ::exit(-1); + } + /* seed list of hosts for the repl set */ + cmdLine._replSet = params["replSet"].as().c_str(); + } if (params.count("only")) { cmdLine.only = params["only"].as().c_str(); } if (params.count("pairwith")) { + cout << "***********************************\n" + << "WARNING WARNING WARNING\n" + << " replica pairs are deprecated\n" + << " see: http://www.mongodb.org/display/DOCS/Replica+Pairs \n" + << "***********************************" << endl; string paired = params["pairwith"].as(); if (params.count("arbiter")) { string arbiter = params["arbiter"].as(); @@ -871,7 +970,8 @@ if (params.count("cacheSize")) { long x = params["cacheSize"].as(); uassert( 10037 , "bad --cacheSize arg", x > 0); - setRecCacheSize(x); + log() << "--cacheSize option not currently supported" << endl; + //setRecCacheSize(x); } if (params.count("port") == 0 ) { if( params.count("configsvr") ) { @@ -880,8 +980,17 @@ if( params.count("shardsvr") ) cmdLine.port = CmdLine::ShardServerPort; } - if ( params.count("configsvr" ) && params.count( "diaglog" ) == 0 ){ - _diaglog.level = 1; + else { + if ( cmdLine.port <= 0 || cmdLine.port > 65535 ){ + out() << "bad --port number" << endl; + dbexit( EXIT_BADOPTIONS ); + } + } + if ( params.count("configsvr" ) ){ + if ( params.count( "diaglog" ) == 0 ) + _diaglog.level = 1; + if ( params.count( "dbpath" ) == 0 ) + dbpath = "/data/configdb"; } if ( params.count( "profile" ) ){ cmdLine.defaultProfile = params["profile"].as(); @@ -892,7 +1001,40 @@ uassert( 12508 , "maxConns can't be greater than 10000000" , newSize < 10000000 ); connTicketHolder.resize( newSize ); } - + if (params.count("nounixsocket")){ + noUnixSocket = true; + } + if (params.count("ipv6")){ + enableIPv6(); + } + if (params.count("noMoveParanoia")){ + cmdLine.moveParanoia = false; + } +#if defined(_WIN32) + if (params.count("serviceName")){ + string x = params["serviceName"].as(); + windowsServiceName = wstring(x.size(),L' '); + for ( size_t i=0; i(); + windowsServiceUser = wstring(x.size(),L' '); + for ( size_t i=0; i(); + windowsServicePassword = wstring(x.size(),L' '); + for ( size_t i=0; i. */ -#include "stdafx.h" +#include "pch.h" #include "query.h" #include "pdfile.h" #include "jsobj.h" -#include "../util/builder.h" +#include "../bson/util/builder.h" #include #include "introspect.h" #include "btree.h" @@ -108,20 +108,30 @@ class CmdEval : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return false; } + virtual void help( stringstream &help ) const { + help << "Evaluate javascript at the server.\n" "http://www.mongodb.org/display/DOCS/Server-side+Code+Execution"; + } // We need at least read only access to run db.eval - auth for eval'd writes will be checked // as they are requested. virtual bool requiresAuth() { return false; } - virtual LockType locktype(){ return WRITE; } - CmdEval() : Command("$eval") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual LockType locktype() const { return NONE; } + CmdEval() : Command("eval", false, "$eval") { } + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + AuthenticationInfo *ai = cc().getAuthenticationInfo(); - uassert( 12598 , "$eval reads unauthorized", ai->isAuthorizedReads(cc().database()->name.c_str())); - return dbEval(ns, cmdObj, result, errmsg); + uassert( 12598 , "$eval reads unauthorized", ai->isAuthorizedReads(dbname.c_str()) ); + + // write security will be enforced in DBDirectClient + mongolock lk( ai->isAuthorized( dbname.c_str() ) ); + Client::Context ctx( dbname ); + + + return dbEval(dbname.c_str(), cmdObj, result, errmsg); } } cmdeval; diff -Nru mongodb-1.4.4/db/db.h mongodb-1.6.3/db/db.h --- mongodb-1.4.4/db/db.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/db.h 2010-09-24 10:02:42.000000000 -0700 @@ -16,9 +16,8 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../util/message.h" -#include "boost/version.hpp" #include "concurrency.h" #include "pdfile.h" #include "client.h" @@ -64,7 +63,6 @@ DBs::const_iterator it = m.find(db); return it != m.end(); } - Database * get( const string& ns , const string& path ) const { dbMutex.assertAtLeastReadLocked(); @@ -103,7 +101,13 @@ } log(1) << "Accessing: " << dbname << " for the first time" << endl; - db = new Database( dbname.c_str() , justCreated , path ); + try { + db = new Database( dbname.c_str() , justCreated , path ); + } + catch ( ... ){ + m.erase( dbname ); + throw; + } _size++; return db; } @@ -140,6 +144,12 @@ private: string _todb( const string& ns ) const { + string d = __todb( ns ); + uassert( 13280 , (string)"invalid db name: " + ns , Database::validDBName( d ) ); + return d; + } + + string __todb( const string& ns ) const { size_t i = ns.find( '.' ); if ( i == string::npos ){ uassert( 13074 , "db name can't be empty" , ns.size() ); @@ -190,7 +200,7 @@ if ( _context ) _context->relocked(); } }; - + /** only does a temp release if we're not nested and have a lock @@ -213,10 +223,11 @@ } } + bool unlocked(){ + return real > 0; + } }; - extern TicketHolder connTicketHolder; - } // namespace mongo //#include "dbinfo.h" diff -Nru mongodb-1.4.4/db/dbhelpers.cpp mongodb-1.6.3/db/dbhelpers.cpp --- mongodb-1.4.4/db/dbhelpers.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/dbhelpers.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "db.h" #include "dbhelpers.h" #include "query.h" @@ -24,10 +24,11 @@ #include "queryoptimizer.h" #include "btree.h" #include "pdfile.h" +#include "oplog.h" namespace mongo { - CursorIterator::CursorIterator( auto_ptr c , BSONObj filter ) + CursorIterator::CursorIterator( shared_ptr c , BSONObj filter ) : _cursor( c ){ if ( ! filter.isEmpty() ) _matcher.reset( new CoveredIndexMatcher( filter , BSONObj() ) ); @@ -93,51 +94,65 @@ class FindOne : public QueryOp { public: FindOne( bool requireIndex ) : requireIndex_( requireIndex ) {} - virtual void init() { + virtual void _init() { if ( requireIndex_ && strcmp( qp().indexKey().firstElement().fieldName(), "$natural" ) == 0 ) throw MsgAssertionException( 9011 , "Not an index cursor" ); c_ = qp().newCursor(); - if ( !c_->ok() ) + if ( !c_->ok() ) { setComplete(); - else - matcher_.reset( new CoveredIndexMatcher( qp().query(), qp().indexKey() ) ); + } } virtual void next() { if ( !c_->ok() ) { setComplete(); return; } - if ( matcher_->matches( c_->currKey(), c_->currLoc() ) ) { + if ( matcher()->matches( c_->currKey(), c_->currLoc() ) ) { one_ = c_->current(); - setComplete(); + loc_ = c_->currLoc(); + setStop(); } else { c_->advance(); } } virtual bool mayRecordPlan() const { return false; } - virtual QueryOp *clone() const { return new FindOne( requireIndex_ ); } + virtual QueryOp *_createChild() const { return new FindOne( requireIndex_ ); } BSONObj one() const { return one_; } + DiskLoc loc() const { return loc_; } private: bool requireIndex_; - auto_ptr< Cursor > c_; - auto_ptr< CoveredIndexMatcher > matcher_; + shared_ptr c_; BSONObj one_; + DiskLoc loc_; }; /* fetch a single object from collection ns that matches query set your db SavedContext first */ - bool Helpers::findOne(const char *ns, BSONObj query, BSONObj& result, bool requireIndex) { - QueryPlanSet s( ns, query, BSONObj(), 0, !requireIndex ); + bool Helpers::findOne(const char *ns, const BSONObj &query, BSONObj& result, bool requireIndex) { + MultiPlanScanner s( ns, query, BSONObj(), 0, !requireIndex ); FindOne original( requireIndex ); shared_ptr< FindOne > res = s.runOp( original ); - massert( 10302 , res->exceptionMessage(), res->complete() ); + if ( ! res->complete() ) + throw MsgAssertionException( res->exception() ); if ( res->one().isEmpty() ) return false; result = res->one(); return true; } + /* fetch a single object from collection ns that matches query + set your db SavedContext first + */ + DiskLoc Helpers::findOne(const char *ns, const BSONObj &query, bool requireIndex) { + MultiPlanScanner s( ns, query, BSONObj(), 0, !requireIndex ); + FindOne original( requireIndex ); + shared_ptr< FindOne > res = s.runOp( original ); + if ( ! res->complete() ) + throw MsgAssertionException( res->exception() ); + return res->loc(); + } + auto_ptr Helpers::find( const char *ns , BSONObj query , bool requireIndex ){ uassert( 10047 , "requireIndex not supported in Helpers::find yet" , ! requireIndex ); auto_ptr i; @@ -145,9 +160,9 @@ return i; } - bool Helpers::findById(Client& c, const char *ns, BSONObj query, BSONObj& result , bool * nsFound , bool * indexFound ){ + dbMutex.assertAtLeastReadLocked(); Database *database = c.database(); assert( database ); NamespaceDetails *d = database->namespaceIndex.details(ns); @@ -173,6 +188,20 @@ return true; } + DiskLoc Helpers::findById(NamespaceDetails *d, BSONObj idquery) { + int idxNo = d->findIdIndex(); + uassert(13430, "no _id index", idxNo>=0); + IndexDetails& i = d->idx( idxNo ); + BSONObj key = i.getKeyFromQuery( idquery ); + return i.head.btree()->findSingle( i , i.head , key ); + } + + bool Helpers::isEmpty(const char *ns) { + Client::Context context(ns); + shared_ptr c = DataFileMgr::findAll(ns); + return !c->ok(); + } + /* Get the first object from a collection. Generally only useful if the collection only ever has a single object -- which is a "singleton collection. @@ -181,7 +210,7 @@ bool Helpers::getSingleton(const char *ns, BSONObj& result) { Client::Context context(ns); - auto_ptr c = DataFileMgr::findAll(ns); + shared_ptr c = DataFileMgr::findAll(ns); if ( !c->ok() ) return false; @@ -189,10 +218,92 @@ return true; } + bool Helpers::getLast(const char *ns, BSONObj& result) { + Client::Context ctx(ns); + shared_ptr c = findTableScan(ns, reverseNaturalObj); + if( !c->ok() ) + return false; + result = c->current(); + return true; + } + + void Helpers::upsert( const string& ns , const BSONObj& o ){ + BSONElement e = o["_id"]; + assert( e.type() ); + BSONObj id = e.wrap(); + + OpDebug debug; + Client::Context context(ns); + updateObjects(ns.c_str(), o, /*pattern=*/id, /*upsert=*/true, /*multi=*/false , /*logtheop=*/true , debug ); + } + void Helpers::putSingleton(const char *ns, BSONObj obj) { OpDebug debug; Client::Context context(ns); - updateObjects(ns, obj, /*pattern=*/BSONObj(), /*upsert=*/true, /*multi=*/false , true , debug ); + updateObjects(ns, obj, /*pattern=*/BSONObj(), /*upsert=*/true, /*multi=*/false , /*logtheop=*/true , debug ); + } + + void Helpers::putSingletonGod(const char *ns, BSONObj obj, bool logTheOp) { + OpDebug debug; + Client::Context context(ns); + _updateObjects(/*god=*/true, ns, obj, /*pattern=*/BSONObj(), /*upsert=*/true, /*multi=*/false , logTheOp , debug ); + } + + BSONObj Helpers::toKeyFormat( const BSONObj& o , BSONObj& key ){ + BSONObjBuilder me; + BSONObjBuilder k; + + BSONObjIterator i( o ); + while ( i.more() ){ + BSONElement e = i.next(); + k.append( e.fieldName() , 1 ); + me.appendAs( e , "" ); + } + key = k.obj(); + return me.obj(); + } + + long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback ){ + BSONObj keya , keyb; + BSONObj minClean = toKeyFormat( min , keya ); + BSONObj maxClean = toKeyFormat( max , keyb ); + assert( keya == keyb ); + + Client::Context ctx(ns); + NamespaceDetails* nsd = nsdetails( ns.c_str() ); + if ( ! nsd ) + return 0; + + int ii = nsd->findIndexByKeyPattern( keya ); + assert( ii >= 0 ); + + long long num = 0; + + IndexDetails& i = nsd->idx( ii ); + + shared_ptr c( new BtreeCursor( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); + auto_ptr cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); + cc->setDoingDeletes( true ); + + while ( c->ok() ){ + DiskLoc rloc = c->currLoc(); + BSONObj key = c->currKey(); + + if ( callback ) + callback->goingToDelete( c->current() ); + + c->advance(); + c->noteLocation(); + + logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() ); + theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); + num++; + + c->checkLocation(); + + } + + return num; } void Helpers::emptyCollection(const char *ns) { @@ -241,7 +352,7 @@ if ( val ) { try { BSONObj k = obj; - theDataFileMgr.insert( name_.c_str(), k, false ); + theDataFileMgr.insertWithObjMod( name_.c_str(), k, false ); } catch ( DBException& ) { // dup key - already in set } @@ -249,5 +360,48 @@ deleteObjects( name_.c_str(), obj, true, false, false ); } } + + RemoveSaver::RemoveSaver( const string& a , const string& b , const string& why) : _out(0){ + static int NUM = 0; + + _root = dbpath; + if ( a.size() ) + _root /= a; + if ( b.size() ) + _root /= b; + assert( a.size() || b.size() ); + + _file = _root; + + stringstream ss; + ss << why << "." << terseCurrentTime(false) << "." << NUM++ << ".bson"; + _file /= ss.str(); + + } + + RemoveSaver::~RemoveSaver(){ + if ( _out ){ + _out->close(); + delete _out; + _out = 0; + } + } + + void RemoveSaver::goingToDelete( const BSONObj& o ){ + if ( ! _out ){ + create_directories( _root ); + _out = new ofstream(); + _out->open( _file.string().c_str() , ios_base::out | ios_base::binary ); + if ( ! _out->good() ){ + log( LL_WARNING ) << "couldn't create file: " << _file.string() << " for remove saving" << endl; + delete _out; + _out = 0; + return; + } + + } + _out->write( o.objdata() , o.objsize() ); + } + } // namespace mongo diff -Nru mongodb-1.4.4/db/dbhelpers.h mongodb-1.6.3/db/dbhelpers.h --- mongodb-1.4.4/db/dbhelpers.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/dbhelpers.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,8 @@ -// dbhelpers.h +/* @file dbhelpers.h + + db helpers are helper functions and classes that let us easily manipulate the local + database instance in-proc. +*/ /** * Copyright (C) 2008 10gen Inc. @@ -16,31 +20,29 @@ * along with this program. If not, see . */ -/* db helpers are helper functions and classes that let us easily manipulate the local - database instance. -*/ - #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "client.h" #include "db.h" namespace mongo { + const BSONObj reverseNaturalObj = BSON( "$natural" << -1 ); + class Cursor; class CoveredIndexMatcher; class CursorIterator { public: - CursorIterator( auto_ptr c , BSONObj filter = BSONObj() ); + CursorIterator( shared_ptr c , BSONObj filter = BSONObj() ); BSONObj next(); bool hasNext(); private: void _advance(); - auto_ptr _cursor; + shared_ptr _cursor; auto_ptr _matcher; BSONObj _o; }; @@ -66,13 +68,16 @@ /* fetch a single object from collection ns that matches query. set your db SavedContext first. + @param query - the query to perform. note this is the low level portion of query so "orderby : ..." + won't work. + @param requireIndex if true, complain if no index for the query. a way to guard against writing a slow query. @return true if object found */ - static bool findOne(const char *ns, BSONObj query, BSONObj& result, bool requireIndex = false); - + static bool findOne(const char *ns, const BSONObj &query, BSONObj& result, bool requireIndex = false); + static DiskLoc findOne(const char *ns, const BSONObj &query, bool requireIndex); /** * @param foundIndex if passed in will be set to 1 if ns and index found @@ -81,18 +86,47 @@ static bool findById(Client&, const char *ns, BSONObj query, BSONObj& result , bool * nsFound = 0 , bool * indexFound = 0 ); + /* uasserts if no _id index. + @return null loc if not found */ + static DiskLoc findById(NamespaceDetails *d, BSONObj query); + static auto_ptr find( const char *ns , BSONObj query = BSONObj() , bool requireIndex = false ); - /* Get/put the first object from a collection. Generally only useful if the collection - only ever has a single object -- which is a "singleton collection". + /** Get/put the first (or last) object from a collection. Generally only useful if the collection + only ever has a single object -- which is a "singleton collection". + + You do not need to set the database (Context) before calling. - You do not need to set the database before calling. - - Returns: true if object exists. + @return true if object exists. */ static bool getSingleton(const char *ns, BSONObj& result); static void putSingleton(const char *ns, BSONObj obj); + static void putSingletonGod(const char *ns, BSONObj obj, bool logTheOp); + static bool getFirst(const char *ns, BSONObj& result) { return getSingleton(ns, result); } + static bool getLast(const char *ns, BSONObj& result); // get last object int he collection; e.g. {$natural : -1} + + /** + * you have to lock + * you do not have to have Context set + * o has to have an _id field or will assert + */ + static void upsert( const string& ns , const BSONObj& o ); + + /** You do not need to set the database before calling. + @return true if collection is empty. + */ + static bool isEmpty(const char *ns); + + // TODO: this should be somewhere else probably + static BSONObj toKeyFormat( const BSONObj& o , BSONObj& key ); + class RemoveCallback { + public: + virtual ~RemoveCallback(){} + virtual void goingToDelete( const BSONObj& o ) = 0; + }; + /* removeRange: operation is oplog'd */ + static long long removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield = false , bool maxInclusive = false , RemoveCallback * callback = 0 ); /* Remove all objects from a collection. You do not need to set the database before calling. @@ -118,5 +152,24 @@ string name_; BSONObj key_; }; + + + /** + * user for saving deletd bson objects to a flat file + */ + class RemoveSaver : public Helpers::RemoveCallback , boost::noncopyable { + public: + RemoveSaver( const string& type , const string& ns , const string& why); + ~RemoveSaver(); + + void goingToDelete( const BSONObj& o ); + + private: + path _root; + path _file; + ofstream* _out; + + }; + } // namespace mongo diff -Nru mongodb-1.4.4/db/dbmessage.h mongodb-1.6.3/db/dbmessage.h --- mongodb-1.4.4/db/dbmessage.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/dbmessage.h 2010-09-24 10:02:42.000000000 -0700 @@ -20,6 +20,7 @@ #include "jsobj.h" #include "namespace.h" #include "../util/message.h" +#include "../client/constants.h" namespace mongo { @@ -37,24 +38,6 @@ #pragma pack(1) struct QueryResult : public MsgData { - enum ResultFlagType { - /* returned, with zero results, when getMore is called but the cursor id - is not valid at the server. */ - ResultFlag_CursorNotFound = 1, - - /* { $err : ... } is being returned */ - ResultFlag_ErrSet = 2, - - /* Have to update config from the server, usually $err is also set */ - ResultFlag_ShardConfigStale = 4, - - /* for backward compatability: this let's us know the server supports - the QueryOption_AwaitData option. if it doesn't, a repl slave client should sleep - a little between getMore's. - */ - ResultFlag_AwaitCapable = 8 - }; - long long cursorId; int startingFrom; int nReturned; @@ -68,41 +51,57 @@ return dataAsInt(); } void setResultFlagsToOk() { - _resultFlags() = 0; // ResultFlag_AwaitCapable + _resultFlags() = ResultFlag_AwaitCapable; } }; #pragma pack() /* For the database/server protocol, these objects and functions encapsulate the various messages transmitted over the connection. - */ + See http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol + */ class DbMessage { public: - DbMessage(const Message& _m) : m(_m) { - theEnd = _m.data->_data + _m.data->dataLen(); - int *r = (int *) _m.data->_data; - reserved = *r; - r++; - data = (const char *) r; + DbMessage(const Message& _m) : m(_m) + { + // for received messages, Message has only one buffer + theEnd = _m.singleData()->_data + _m.header()->dataLen(); + char *r = _m.singleData()->_data; + reserved = (int *) r; + data = r + 4; nextjsobj = data; } - const char * getns() { + /** the 32 bit field before the ns */ + int& reservedField() { return *reserved; } + + const char * getns() const { return data; } - void getns(Namespace& ns) { + void getns(Namespace& ns) const { ns = data; } + + const char * afterNS() const { + return data + strlen( data ) + 1; + } - - void resetPull(){ - nextjsobj = data; + int getInt( int num ) const { + const int * foo = (const int*)afterNS(); + return foo[num]; + } + + int getQueryNToReturn() const { + return getInt( 1 ); } - int pullInt() { + + void resetPull(){ nextjsobj = data; } + int pullInt() const { return pullInt(); } + int& pullInt() { if ( nextjsobj == data ) nextjsobj += strlen(data) + 1; // skip namespace - int i = *((int *)nextjsobj); + int& i = *((int *)nextjsobj); nextjsobj += 4; return i; } @@ -117,7 +116,7 @@ return i; } - OID* getOID() { + OID* getOID() const { return (OID *) (data + strlen(data) + 1); // skip namespace } @@ -129,7 +128,7 @@ } /* for insert and update msgs */ - bool moreJSObjs() { + bool moreJSObjs() const { return nextjsobj != 0; } BSONObj nextJsObj() { @@ -137,13 +136,13 @@ nextjsobj += strlen(data) + 1; // skip namespace massert( 13066 , "Message contains no documents", theEnd > nextjsobj ); } - massert( 10304 , "Remaining data too small for BSON object", theEnd - nextjsobj > 3 ); + massert( 10304 , "Client Error: Remaining data too small for BSON object", theEnd - nextjsobj > 3 ); BSONObj js(nextjsobj); - massert( 10305 , "Invalid object size", js.objsize() > 3 ); - massert( 10306 , "Next object larger than available space", + massert( 10305 , "Client Error: Invalid object size", js.objsize() > 3 ); + massert( 10306 , "Client Error: Next object larger than space left in message", js.objsize() < ( theEnd - data ) ); if ( objcheck && !js.valid() ) { - massert( 10307 , "bad object in message", false); + massert( 10307 , "Client Error: bad object in message", false); } nextjsobj += js.objsize(); if ( nextjsobj >= theEnd ) @@ -151,9 +150,7 @@ return js; } - const Message& msg() { - return m; - } + const Message& msg() const { return m; } void markSet(){ mark = nextjsobj; @@ -165,7 +162,7 @@ private: const Message& m; - int reserved; + int* reserved; const char *data; const char *nextjsobj; const char *theEnd; @@ -193,7 +190,7 @@ if ( d.moreJSObjs() ) { fields = d.nextJsObj(); } - queryOptions = d.msg().data->dataAsInt(); + queryOptions = d.msg().header()->dataAsInt(); } }; @@ -211,7 +208,7 @@ ) { BufBuilder b(32768); b.skip(sizeof(QueryResult)); - b.append(data, size); + b.appendBuf(data, size); QueryResult *qr = (QueryResult *) b.buf(); qr->_resultFlags() = queryResultFlags; qr->len = b.len(); @@ -221,12 +218,13 @@ qr->nReturned = nReturned; b.decouple(); Message resp(qr, true); - p->reply(requestMsg, resp, requestMsg.data->id); + p->reply(requestMsg, resp, requestMsg.header()->id); } } // namespace mongo //#include "bsonobj.h" + #include "instance.h" namespace mongo { @@ -245,7 +243,7 @@ inline void replyToQuery(int queryResultFlags, Message &m, DbResponse &dbresponse, BSONObj obj) { BufBuilder b; b.skip(sizeof(QueryResult)); - b.append((void*) obj.objdata(), obj.objsize()); + b.appendBuf((void*) obj.objdata(), obj.objsize()); QueryResult* msgdata = (QueryResult *) b.buf(); b.decouple(); QueryResult *qr = msgdata; @@ -258,7 +256,9 @@ Message *resp = new Message(); resp->setData(msgdata, true); // transport will free dbresponse.response = resp; - dbresponse.responseTo = m.data->id; + dbresponse.responseTo = m.header()->id; } + string debugString( Message& m ); + } // namespace mongo diff -Nru mongodb-1.4.4/db/db.rc mongodb-1.6.3/db/db.rc --- mongodb-1.4.4/db/db.rc 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/db.rc 2010-09-24 10:02:42.000000000 -0700 @@ -1,61 +1,12 @@ -// Microsoft Visual C++ generated resource script. -// -#include "resource.h" - -#define APSTUDIO_READONLY_SYMBOLS -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 2 resource. -// -// #include "afxres.h" - -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - -///////////////////////////////////////////////////////////////////////////// -// English (U.S.) resources - -#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) -LANGUAGE 9, 1 -#pragma code_page(1252) - -#ifdef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// TEXTINCLUDE -// - -1 TEXTINCLUDE -BEGIN - "resource.h\0" -END - -2 TEXTINCLUDE -BEGIN - "#include ""afxres.h""\r\n" - "\0" -END - -3 TEXTINCLUDE -BEGIN - "\r\n" - "\0" -END - -#endif // APSTUDIO_INVOKED - -#endif // English (U.S.) resources -///////////////////////////////////////////////////////////////////////////// - - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED - +// Microsoft Visual C++ generated resource script. +// +#include "resource.h" + +///////////////////////////////////////////////////////////////////////////// +// +// Icon +// +// Icon with lowest ID value placed first to ensure application icon +// remains consistent on all systems. +IDI_ICON2 ICON "mongo.ico" +///////////////////////////////////////////////////////////////////////////// \ No newline at end of file diff -Nru mongodb-1.4.4/db/db.sln mongodb-1.6.3/db/db.sln --- mongodb-1.4.4/db/db.sln 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/db.sln 2010-09-24 10:02:42.000000000 -0700 @@ -4,51 +4,83 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongod", "db.vcproj", "{215B2D68-0A70-4D10-8E75-B31010C62A91}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{4082881B-EB00-486F-906C-843B8EC06E18}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "dbtests", "dbtests", "{C72EBEDD-342D-4371-8B0D-D7505902FA69}" ProjectSection(SolutionItems) = preProject - ..\dbtests\btreetests.cpp = ..\dbtests\btreetests.cpp + driverHelpers.cpp = driverHelpers.cpp EndProjectSection EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shell", "shell", "{2CABB3B8-C9A6-478D-9463-0B37799ED708}" -EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{2B262D59-9DC7-4BF1-A431-1BD4966899A5}" ProjectSection(SolutionItems) = preProject ..\tools\bridge.cpp = ..\tools\bridge.cpp + ..\tools\dump.cpp = ..\tools\dump.cpp + ..\tools\export.cpp = ..\tools\export.cpp + ..\tools\files.cpp = ..\tools\files.cpp + ..\tools\import.cpp = ..\tools\import.cpp + ..\tools\restore.cpp = ..\tools\restore.cpp ..\tools\sniffer.cpp = ..\tools\sniffer.cpp + ..\tools\stat.cpp = ..\tools\stat.cpp + ..\tools\tool.cpp = ..\tools\tool.cpp + ..\tools\tool.h = ..\tools\tool.h EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongos", "..\s\dbgrid.vcproj", "{E03717ED-69B4-4D21-BC55-DF6690B585C6}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "..\dbtests\test.vcproj", "{215B2D68-0A70-4D10-8E75-B33010C62A91}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "unix files", "unix files", "{2F760952-C71B-4865-998F-AABAE96D1373}" + ProjectSection(SolutionItems) = preProject + ..\util\mmap_posix.cpp = ..\util\mmap_posix.cpp + ..\util\processinfo_darwin.cpp = ..\util\processinfo_darwin.cpp + ..\util\processinfo_linux2.cpp = ..\util\processinfo_linux2.cpp + ..\util\processinfo_none.cpp = ..\util\processinfo_none.cpp + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shell", "shell", "{407B4B88-3451-433C-B74F-31B31FEB5791}" + ProjectSection(SolutionItems) = preProject + ..\shell\dbshell.cpp = ..\shell\dbshell.cpp + ..\shell\mongo_vstudio.cpp = ..\shell\mongo_vstudio.cpp + ..\shell\utils.cpp = ..\shell\utils.cpp + ..\shell\utils.h = ..\shell\utils.h + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "other source files", "other source files", "{12B11474-2D74-48C3-BB3D-F03249BEA88F}" + ProjectSection(SolutionItems) = preProject + ..\buildscripts\buildboost.bat = ..\buildscripts\buildboost.bat + ..\buildscripts\buildboost64.bat = ..\buildscripts\buildboost64.bat + ..\SConstruct = ..\SConstruct + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bsondemo", "..\bson\bsondemo\bsondemo.vcproj", "{C9DB5EB7-81AA-4185-BAA1-DA035654402F}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug Recstore|Win32 = Debug Recstore|Win32 Debug|Win32 = Debug|Win32 Release|Win32 = Release|Win32 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug Recstore|Win32.ActiveCfg = Debug Recstore|Win32 - {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug Recstore|Win32.Build.0 = Debug Recstore|Win32 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Win32.Build.0 = Debug|Win32 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Win32.ActiveCfg = Release|Win32 {215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Win32.Build.0 = Release|Win32 - {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug Recstore|Win32.ActiveCfg = Debug Recstore|Win32 - {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug Recstore|Win32.Build.0 = Debug Recstore|Win32 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Win32.ActiveCfg = Debug|Win32 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Win32.Build.0 = Debug|Win32 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Win32.ActiveCfg = Release|Win32 {E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Win32.Build.0 = Release|Win32 - {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug Recstore|Win32.ActiveCfg = Debug Recstore|Win32 - {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug Recstore|Win32.Build.0 = Debug Recstore|Win32 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.Build.0 = Debug|Win32 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.ActiveCfg = Release|Win32 {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.Build.0 = Release|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Win32.ActiveCfg = Debug|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Win32.Build.0 = Debug|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Win32.ActiveCfg = Release|Win32 + {C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {2B262D59-9DC7-4BF1-A431-1BD4966899A5} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {2F760952-C71B-4865-998F-AABAE96D1373} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {407B4B88-3451-433C-B74F-31B31FEB5791} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + {4082881B-EB00-486F-906C-843B8EC06E18} = {12B11474-2D74-48C3-BB3D-F03249BEA88F} + EndGlobalSection EndGlobal diff -Nru mongodb-1.4.4/db/db.vcproj mongodb-1.6.3/db/db.vcproj --- mongodb-1.4.4/db/db.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/db.vcproj 2010-09-24 10:02:42.000000000 -0700 @@ -1,1891 +1,1885 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mongodb-1.4.4/db/db.vcxproj mongodb-1.6.3/db/db.vcxproj --- mongodb-1.4.4/db/db.vcxproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/db.vcxproj 2010-09-24 10:02:42.000000000 -0700 @@ -1,18 +1,22 @@  - - Debug Recstore - Win32 - Debug Win32 + + Debug + x64 + Release Win32 + + Release + x64 + mongod @@ -21,13 +25,12 @@ Win32Proj - + Application - false - false Unicode + true - + Application Unicode true @@ -38,47 +41,73 @@ false Unicode + + Application + false + false + Unicode + - - - - + + + + - + + + + - <_ProjectFileVersion>10.0.21006.1 + <_ProjectFileVersion>10.0.30319.1 $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ $(Configuration)\ + $(Configuration)\ true + true $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ $(Configuration)\ + $(Configuration)\ false - $(SolutionDir)$(Configuration)\ - $(Configuration)\ - true + false + AllRules.ruleset + AllRules.ruleset + + + + + AllRules.ruleset + AllRules.ruleset + + + + Disabled - ..\..\js\src;..\pcre-7.4;c:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - true + ..\..\js\src;..\pcre-7.4;c:\boost;\boost + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + No EnableFastChecks MultiThreadedDebugDLL Use + pch.h Level3 EditAndContinue 4355;4800;%(DisableSpecificWarnings) + true - ws2_32.lib;%(AdditionalDependencies) - c:\Program Files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_32;\boost\lib\vs2010_32;\boost\lib false %(IgnoreSpecificDefaultLibraries) true @@ -86,343 +115,393 @@ MachineX86 + + + Disabled + ..\..\js\src;..\pcre-7.4;c:\boost;\boost + SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebugDLL + Use + pch.h + Level3 + ProgramDatabase + 4355;4800;4267;4244;%(DisableSpecificWarnings) + true + No + + + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_64;\boost\lib\vs2010_64;\boost\lib + false + %(IgnoreSpecificDefaultLibraries) + true + Console + + MaxSpeed true - ..\..\js\src;..\pcre-7.4;c:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + ..\..\js\src;..\pcre-7.4;c:\boost;\boost + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) MultiThreaded true Use - stdafx.h + pch.h Level3 ProgramDatabase 4355;4800;%(DisableSpecificWarnings) + true + No - ws2_32.lib;%(AdditionalDependencies) - c:\Program Files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) + ws2_32.lib;psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_32;\boost\lib\vs2010_32;\boost\lib true Console true true MachineX86 + false - + - Disabled - ..\..\js\src;..\pcre-7.4;c:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - _RECSTORE;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL + MaxSpeed + true + ..\..\js\src;..\pcre-7.4;c:\boost;\boost + SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + MultiThreaded + true Use + pch.h Level3 - EditAndContinue - 4355;4800;%(DisableSpecificWarnings) + ProgramDatabase + 4355;4800;4267;4244;%(DisableSpecificWarnings) + true + No - ws2_32.lib;%(AdditionalDependencies) - c:\Program Files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) - false - %(IgnoreSpecificDefaultLibraries) + ws2_32.lib;psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_64;\boost\lib\vs2010_64;\boost\lib true Console - MachineX86 + true + true - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Document - true - true - - - Document - true - - - + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - - + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + Create + Create + Create + Create + + + + - + + + @@ -433,55 +512,244 @@ - + - - Create - Create - Create - + + + + + + + - - + + + + + + Use + Use - - - - + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Document + true + true + true + + + Document + true + true + true + + + Document + true + true + true + + + Document + true + true + true + + + + diff -Nru mongodb-1.4.4/db/db.vcxproj.filters mongodb-1.6.3/db/db.vcxproj.filters --- mongodb-1.4.4/db/db.vcxproj.filters 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/db.vcxproj.filters 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,928 @@ + + + + + replSets + + + replSets + + + replSets + + + db\btree + + + db\btree + + + replSets + + + client + + + client + + + client + + + replSets + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + util\pcre + + + replSets + + + util\concurrency + + + util\concurrency + + + replSets + + + scripting + + + scripting + + + scripting + + + client + + + replSets + + + repl_old + + + db\core + + + db\core + + + repl_old + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + sharding + + + sharding + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + util\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + util\core + + + db\core + + + db\core + + + db\core + + + util\core + + + util\core + + + util\core + + + db\modules + + + util\core + + + util\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + util\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\storage engine + + + repl_old + + + repl_old + + + sharding + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\concurrency + + + util\core + + + util\core + + + util\core + + + util\core + + + db + + + db + + + sharding + + + sharding + + + util\core + + + db\geo + + + db\core + + + sharding + + + sharding + + + sharding + + + sharding + + + client + + + util\core + + + util\concurrency + + + client + + + sharding + + + sharding + + + replSets + + + replSets + + + replSets + + + db\geo + + + db\core + + + util + + + util + + + db + + + + + + replSets + + + replSets + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + bson + + + db\btree + + + replSets + + + client + + + client + + + util\mongoutils + + + util\mongoutils + + + util\pcre + + + replSets + + + util\concurrency + + + util\concurrency + + + client + + + client + + + client + + + util\concurrency + + + replSets + + + util\concurrency + + + util\concurrency + + + stats + + + stats + + + stats + + + util\concurrency + + + util\concurrency + + + util\concurrency + + + scripting + + + scripting + + + scripting + + + scripting + + + scripting + + + scripting + + + client + + + replSets + + + replSets + + + repl_old + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + util\core + + + util\core + + + db\storage engine + + + util\core + + + util\core + + + util\core + + + util\core + + + db\core + + + db\core + + + db\core + + + db\core + + + util\core + + + util\core + + + util\core + + + util\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + util\core + + + util\core + + + db\core + + + db\core + + + util\core + + + util\core + + + util\core + + + db\core + + + util\core + + + client + + + db\core + + + util\core + + + bson + + + bson + + + db\core + + + db\core + + + db\core + + + db\core + + + db\core + + + db\storage engine + + + db\storage engine + + + db\storage engine + + + db\storage engine + + + db\storage engine + + + repl_old + + + repl_old + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + db\core + + + util\concurrency + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + util\core + + + db + + + util\core + + + db\geo + + + db\core + + + client + + + sharding + + + repl_old + + + + + {4b29c82d-d30a-4bf1-9c78-19f59c5777ba} + + + {d2c3db88-7fb7-4365-a63b-b7ad45d316ae} + + + {8e6fe846-2833-45bb-b13b-c0f0d4d38593} + + + {cc5d96e6-1805-422b-804d-adcb367dc721} + + + {fa527226-9b03-4f17-8e4c-80d31fb1e449} + + + {932baf83-ba80-49e5-8280-f1b9c8dbbde6} + + + {88f4374a-9d55-44a2-a234-c758cc4affa9} + + + {6204f40e-3a9c-44e2-a88b-0e1b6fd9a510} + + + {37b238b2-21ec-4788-bdf9-a59b43490454} + + + {6b78f34f-e6b0-49e4-b04e-6478c3a3c077} + + + {d565a775-7a99-4860-b25f-441e1655b7c6} + + + {466f15bb-4d5b-4634-ba6b-05a282e0a174} + + + {d7f08f93-36bf-49cd-9e1c-ba1fec3234ce} + + + {e899caa1-9a90-4604-ac2e-68d5ca12425c} + + + {9775f24c-3a29-4e0d-b5de-991c592cf376} + + + {9aea1b83-cdcb-48a8-97e6-47805cacdc29} + + + {aff20a87-2efe-4861-930f-8780c08cbea5} + + + {2a0924a5-9bd9-4c86-a149-0df09dcb5548} + + + {03b0d798-b13d-48f4-930d-ca827e2a3f00} + + + {3b73f786-d352-446f-a5f5-df49384baf7a} + + + {4a1ea357-1077-4ad1-85b4-db48a6e1eb46} + + + + + replSets + + + util\mongoutils + + + db + + + Resource Files + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + replSets\testing + + + + + libs + + + libs + + + libs + + + libs + + + + + Resource Files + + + \ No newline at end of file diff -Nru mongodb-1.4.4/db/dbwebserver.cpp mongodb-1.6.3/db/dbwebserver.cpp --- mongodb-1.4.4/db/dbwebserver.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/dbwebserver.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -19,45 +19,31 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../util/miniwebserver.h" +#include "../util/mongoutils/html.h" #include "../util/md5.hpp" #include "db.h" -#include "repl.h" -#include "replset.h" #include "instance.h" #include "security.h" #include "stats/snapshots.h" #include "background.h" #include "commands.h" - +#include "../util/version.h" +#include "../util/ramlog.h" #include +#include "dbwebserver.h" #include #undef assert -#define assert xassert +#define assert MONGO_assert namespace mongo { - extern string bind_ip; - extern const char *replInfo; - - bool getInitialSyncCompleted(); + using namespace mongoutils::html; + using namespace bson; time_t started = time(0); - /* - string toString() { - stringstream ss; - unsigned long long dt = last - start; - ss << dt/1000; - ss << '\t'; - ss << timeLocked/1000 << '\t'; - if( dt ) - ss << (timeLocked*100)/dt << '%'; - return ss.str(); - } - */ - struct Timing { Timing() { start = timeLocked = 0; @@ -65,187 +51,39 @@ unsigned long long start, timeLocked; }; - bool _bold; - string bold(bool x) { - _bold = x; - return x ? "" : ""; - } - string bold() { - return _bold ? "" : ""; - } + bool execCommand( Command * c , + Client& client , int queryOptions , + const char *ns, BSONObj& cmdObj , + BSONObjBuilder& result, + bool fromRepl ); class DbWebServer : public MiniWebServer { public: - // caller locks - void doLockedStuff(stringstream& ss) { - ss << "# databases: " << dbHolder.size() << '\n'; - - ss << bold(ClientCursor::byLocSize()>10000) << "Cursors byLoc.size(): " << ClientCursor::byLocSize() << bold() << '\n'; - ss << "\nreplication\n"; - ss << "master: " << replSettings.master << '\n'; - ss << "slave: " << replSettings.slave << '\n'; - if ( replPair ) { - ss << "replpair:\n"; - ss << replPair->getInfo(); - } - bool seemCaughtUp = getInitialSyncCompleted(); - if ( !seemCaughtUp ) ss << ""; - ss << "initialSyncCompleted: " << seemCaughtUp; - if ( !seemCaughtUp ) ss << ""; - ss << '\n'; - - auto_ptr delta = statsSnapshots.computeDelta(); - if ( delta.get() ){ - ss << "\nDBTOP (occurences|percent of elapsed)\n"; - ss << ""; - ss << ""; - ss << "" - "" - "" - "" - "" - "" - "" - "" - ""; - ss << ""; - - display( ss , (double) delta->elapsed() , "GLOBAL" , delta->globalUsageDiff() ); - - Top::UsageMap usage = delta->collectionUsageDiff(); - for ( Top::UsageMap::iterator i=usage.begin(); i != usage.end(); i++ ){ - display( ss , (double) delta->elapsed() , i->first , i->second ); - } - - ss << "
NStotalReadsWritesQueriesGetMoresInsertsUpdatesRemoves
"; - } - - statsSnapshots.outputLockInfoHTML( ss ); - - BackgroundOperation::dump(ss); + DbWebServer(const string& ip, int port) : MiniWebServer(ip, port) { + WebStatusPlugin::initAll(); } - void display( stringstream& ss , double elapsed , const Top::UsageData& usage ){ - ss << ""; - ss << usage.count; - ss << ""; - double per = 100 * ((double)usage.time)/elapsed; - ss << setprecision(2) << fixed << per << "%"; - ss << ""; - } - - void display( stringstream& ss , double elapsed , const string& ns , const Top::CollectionData& data ){ - if ( ns != "GLOBAL" && data.total.count == 0 ) - return; - ss << "" << ns << ""; - - display( ss , elapsed , data.total ); - - display( ss , elapsed , data.readLock ); - display( ss , elapsed , data.writeLock ); - - display( ss , elapsed , data.queries ); - display( ss , elapsed , data.getmore ); - display( ss , elapsed , data.insert ); - display( ss , elapsed , data.update ); - display( ss , elapsed , data.remove ); - - ss << ""; - } - - void tablecell( stringstream& ss , bool b ){ - ss << "" << (b ? "X" : "") << ""; - } - + private: - template< typename T> - void tablecell( stringstream& ss , const T& t ){ - ss << "" << t << ""; - } - void doUnlockedStuff(stringstream& ss) { /* this is in the header already ss << "port: " << port << '\n'; */ - ss << mongodVersion() << "\n"; - ss << "git hash: " << gitVersion() << "\n"; - ss << "sys info: " << sysInfo() << "\n"; - ss << "\n"; - ss << "dbwritelocked: " << dbMutex.info().isLocked() << " (initial)\n"; - ss << "uptime: " << time(0)-started << " seconds\n"; - if ( replAllDead ) - ss << "replication replAllDead=" << replAllDead << "\n"; - ss << "\nassertions:\n"; - for ( int i = 0; i < 4; i++ ) { - if ( lastAssert[i].isSet() ) { - ss << ""; - if ( i == 3 ) ss << "usererr"; - else ss << i; - ss << "" << ' ' << lastAssert[i].toString(); - } - } - - ss << "\nreplInfo: " << replInfo << "\n\n"; - - ss << "Clients:\n"; - ss << ""; - ss << "" - << "" - - << "" - << "" - << "" - << "" - << "" - << "" - << "" - << "" - << "" - << "" - << "" - - << "\n"; - { - scoped_lock bl(Client::clientsMutex); - for( set::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) { - Client *c = *i; - CurOp& co = *(c->curop()); - ss << ""; - - tablecell( ss , co.opNum() ); - tablecell( ss , co.active() ); - tablecell( ss , co.getLockType() ); - tablecell( ss , co.isWaitingForLock() ); - if ( co.active() ) - tablecell( ss , co.elapsedSeconds() ); - else - tablecell( ss , "" ); - tablecell( ss , co.getOp() ); - tablecell( ss , co.getNS() ); - if ( co.haveQuery() ) - tablecell( ss , co.query() ); - else - tablecell( ss , "" ); - tablecell( ss , co.getRemoteString() ); - - tablecell( ss , co.getMessage() ); - tablecell( ss , co.getProgressMeter().toString() ); - - - ss << ""; - } - } - ss << "
ThreadOpIdActiveLockTypeWaitingSecsRunningOpNameSpaceQueryclientmsgprogress
" << c->desc() << "
\n"; + ss << "

";
+            ss << mongodVersion() << '\n';
+            ss << "git hash: " << gitVersion() << '\n';
+            ss << "sys info: " << sysInfo() << '\n';
+            ss << "uptime: " << time(0)-started << " seconds\n";
+            ss << "
"; } + + private: - bool allowed( const char * rq , vector& headers, const SockAddr &from ){ - - if ( from.localhost() ) + bool allowed( const char * rq , vector& headers, const SockAddr &from ) { + if ( from.isLocalHost() ) return true; - - Client::GodScope gs; - if ( db.findOne( "admin.system.users" , BSONObj() , 0 , QueryOption_SlaveOk ).isEmpty() ) + if ( ! webHaveAdminUsers() ) return true; - + string auth = getHeader( rq , "Authorization" ); if ( auth.size() > 0 && auth.find( "Digest " ) == 0 ){ @@ -260,25 +98,26 @@ parms[name] = val; } - BSONObj user = db.findOne( "admin.system.users" , BSON( "user" << parms["username"] ) ); + BSONObj user = webGetAdminUser( parms["username"] ); if ( ! user.isEmpty() ){ string ha1 = user["pwd"].str(); string ha2 = md5simpledigest( (string)"GET" + ":" + parms["uri"] ); - string r = ha1 + ":" + parms["nonce"]; + stringstream r; + r << ha1 << ':' << parms["nonce"]; if ( parms["nc"].size() && parms["cnonce"].size() && parms["qop"].size() ){ - r += ":"; - r += parms["nc"]; - r += ":"; - r += parms["cnonce"]; - r += ":"; - r += parms["qop"]; + r << ':'; + r << parms["nc"]; + r << ':'; + r << parms["cnonce"]; + r << ':'; + r << parms["qop"]; } - r += ":"; - r += ha2; - r = md5simpledigest( r ); + r << ':'; + r << ha2; + string r1 = md5simpledigest( r.str() ); - if ( r == parms["response"] ) + if ( r1 == parms["response"] ) return true; } @@ -307,85 +146,239 @@ const SockAddr &from ) { - //out() << "url [" << url << "]" << endl; - if ( url.size() > 1 ) { - if ( url.find( "/_status" ) == 0 ){ - if ( ! allowed( rq , headers, from ) ){ - responseCode = 401; - responseMsg = "not allowed\n"; - return; - } - headers.push_back( "Content-Type: application/json" ); - generateServerStatus( url , responseMsg ); - responseCode = 200; + if ( ! allowed( rq , headers, from ) ) { + responseCode = 401; + headers.push_back( "Content-Type: text/plain" ); + responseMsg = "not allowed\n"; return; + } + + { + DbWebHandler * handler = DbWebHandler::findHandler( url ); + if ( handler ){ + if ( handler->requiresREST( url ) && ! cmdLine.rest ) + _rejectREST( responseMsg , responseCode , headers ); + else + handler->handle( rq , url , responseMsg , responseCode , headers , from ); + return; + } } - if ( ! cmdLine.rest ){ - responseCode = 403; - responseMsg = "rest is not enabled. use --rest to turn on"; + + if ( ! cmdLine.rest ) { + _rejectREST( responseMsg , responseCode , headers ); return; } - if ( ! allowed( rq , headers, from ) ){ - responseCode = 401; - responseMsg = "not allowed\n"; - return; - } - handleRESTRequest( rq , url , responseMsg , responseCode , headers ); + + responseCode = 404; + headers.push_back( "Content-Type: text/html" ); + responseMsg = "unknown url\n"; return; } + + // generate home page + if ( ! allowed( rq , headers, from ) ){ + responseCode = 401; + responseMsg = "not allowed\n"; + return; + } responseCode = 200; stringstream ss; - ss << ""; - string dbname; { stringstream z; - z << "mongodb " << getHostName() << ':' << mongo::cmdLine.port << ' '; + z << "mongod " << prettyHostName(); dbname = z.str(); } - ss << dbname << "

" << dbname << "

\n

";
-
-            doUnlockedStuff(ss);
+            ss << start(dbname) << h2(dbname);
+            ss << "

List all commands | \n"; + ss << "Replica set status

\n"; + //ss << "_status"; { - Timer t; - readlocktry lk( "" , 2000 ); - if ( lk.got() ){ - ss << "time to get dblock: " << t.millis() << "ms\n"; - doLockedStuff(ss); - } - else { - ss << "\ntimed out getting dblock\n"; + const map *m = Command::webCommands(); + if( m ) { + ss << a("", "These read-only context-less commands can be executed from the web interface. Results are json format, unless ?text is appended in which case the result is output as text for easier human viewing", "Commands") << ": "; + for( map::const_iterator i = m->begin(); i != m->end(); i++ ) { + stringstream h; + i->second->help(h); + string help = h.str(); + ss << "first << "?text\""; + if( help != "no help defined" ) + ss << " title=\"" << help << '"'; + ss << ">" << i->first << " "; + } + ss << '\n'; } } - + ss << '\n'; + /* + ss << "HTTP admin port:" << _port << "

\n"; + */ + + doUnlockedStuff(ss); - ss << "

"; + WebStatusPlugin::runAll( ss ); + + ss << "\n"; responseMsg = ss.str(); - // we want to return SavedContext from before the authentication was performed - if ( ! allowed( rq , headers, from ) ){ - responseCode = 401; - responseMsg = "not allowed\n"; - return; - } + + } + + void _rejectREST( string& responseMsg , int& responseCode, vector& headers ){ + responseCode = 403; + stringstream ss; + ss << "REST is not enabled. use --rest to turn on.\n"; + ss << "check that port " << _port << " is secured for the network too.\n"; + responseMsg = ss.str(); + headers.push_back( "Content-Type: text/plain" ); + } + + }; + // --- + + bool prisort( const Prioritizable * a , const Prioritizable * b ){ + return a->priority() < b->priority(); + } + + // -- status framework --- + WebStatusPlugin::WebStatusPlugin( const string& secionName , double priority , const string& subheader ) + : Prioritizable(priority), _name( secionName ) , _subHeading( subheader ) { + if ( ! _plugins ) + _plugins = new vector(); + _plugins->push_back( this ); + } + + void WebStatusPlugin::initAll(){ + if ( ! _plugins ) + return; + + sort( _plugins->begin(), _plugins->end() , prisort ); + + for ( unsigned i=0; i<_plugins->size(); i++ ) + (*_plugins)[i]->init(); + } + + void WebStatusPlugin::runAll( stringstream& ss ){ + if ( ! _plugins ) + return; + + for ( unsigned i=0; i<_plugins->size(); i++ ){ + WebStatusPlugin * p = (*_plugins)[i]; + ss << "
\n" + << "" << p->_name << ""; + + ss << " " << p->_subHeading; + + ss << "
\n"; + + p->run(ss); } - void generateServerStatus( string url , string& responseMsg ){ + } + + vector * WebStatusPlugin::_plugins = 0; + + // -- basic statuc plugins -- + + class LogPlugin : public WebStatusPlugin { + public: + LogPlugin() : WebStatusPlugin( "Log" , 100 ), _log(0){ + } + + virtual void init(){ + assert( ! _log ); + _log = new RamLog(); + Logstream::get().addGlobalTee( _log ); + } + + virtual void run( stringstream& ss ){ + _log->toHTML( ss ); + } + RamLog * _log; + }; + + LogPlugin * logPlugin = new LogPlugin(); + + // -- handler framework --- + + DbWebHandler::DbWebHandler( const string& name , double priority , bool requiresREST ) + : Prioritizable(priority), _name(name) , _requiresREST(requiresREST){ + + { // setup strings + _defaultUrl = "/"; + _defaultUrl += name; + + stringstream ss; + ss << name << " priority: " << priority << " rest: " << requiresREST; + _toString = ss.str(); + } + + { // add to handler list + if ( ! _handlers ) + _handlers = new vector(); + _handlers->push_back( this ); + sort( _handlers->begin() , _handlers->end() , prisort ); + } + } + + DbWebHandler * DbWebHandler::findHandler( const string& url ){ + if ( ! _handlers ) + return 0; + + for ( unsigned i=0; i<_handlers->size(); i++ ){ + DbWebHandler * h = (*_handlers)[i]; + if ( h->handles( url ) ) + return h; + } + + return 0; + } + + vector * DbWebHandler::_handlers = 0; + + // --- basic handlers --- + + class FavIconHandler : public DbWebHandler { + public: + FavIconHandler() : DbWebHandler( "favicon.ico" , 0 , false ){} + + virtual void handle( const char *rq, string url, + string& responseMsg, int& responseCode, + vector& headers, const SockAddr &from ){ + responseCode = 404; + headers.push_back( "Content-Type: text/plain" ); + responseMsg = "no favicon\n"; + } + + } faviconHandler; + + class StatusHandler : public DbWebHandler { + public: + StatusHandler() : DbWebHandler( "_status" , 1 , false ){} + + virtual void handle( const char *rq, string url, + string& responseMsg, int& responseCode, + vector& headers, const SockAddr &from ){ + headers.push_back( "Content-Type: application/json" ); + responseCode = 200; + static vector commands; if ( commands.size() == 0 ){ commands.push_back( "serverStatus" ); commands.push_back( "buildinfo" ); } - + BSONObj params; if ( url.find( "?" ) != string::npos ) { - parseParams( params , url.substr( url.find( "?" ) + 1 ) ); + MiniWebServer::parseParams( params , url.substr( url.find( "?" ) + 1 ) ); } BSONObjBuilder buf(1024); @@ -400,7 +393,7 @@ BSONObj co; { BSONObjBuilder b; - b.append( cmd.c_str() , 1 ); + b.append( cmd , 1 ); if ( cmd == "serverStatus" && params["repl"].type() ){ b.append( "repl" , atoi( params["repl"].valuestr() ) ); @@ -413,188 +406,143 @@ BSONObjBuilder sub; if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) ) - buf.append( cmd.c_str() , errmsg ); + buf.append( cmd , errmsg ); else - buf.append( cmd.c_str() , sub.obj() ); + buf.append( cmd , sub.obj() ); } responseMsg = buf.obj().jsonString(); - } - void handleRESTRequest( const char *rq, // the full request - string url, - string& responseMsg, - int& responseCode, - vector& headers // if completely empty, content-type: text/html will be added - ) { - - string::size_type first = url.find( "/" , 1 ); - if ( first == string::npos ) { - responseCode = 400; - return; - } + } - string method = parseMethod( rq ); - string dbname = url.substr( 1 , first - 1 ); - string coll = url.substr( first + 1 ); - string action = ""; + } statusHandler; - BSONObj params; - if ( coll.find( "?" ) != string::npos ) { - parseParams( params , coll.substr( coll.find( "?" ) + 1 ) ); - coll = coll.substr( 0 , coll.find( "?" ) ); - } + class CommandListHandler : public DbWebHandler { + public: + CommandListHandler() : DbWebHandler( "_commands" , 1 , true ){} + + virtual void handle( const char *rq, string url, + string& responseMsg, int& responseCode, + vector& headers, const SockAddr &from ){ + headers.push_back( "Content-Type: text/html" ); + responseCode = 200; + + stringstream ss; + ss << start("Commands List"); + ss << p( a("/", "back", "Home") ); + ss << p( "MongoDB List of Commands\n" ); + const map *m = Command::commandsByBestName(); + ss << "S:slave-ok R:read-lock W:write-lock A:admin-only
\n"; + ss << table(); + ss << "CommandAttributesHelp\n"; + for( map::const_iterator i = m->begin(); i != m->end(); i++ ) + i->second->htmlHelp(ss); + ss << _table() << _end(); + + responseMsg = ss.str(); + } + } commandListHandler; - string::size_type last = coll.find_last_of( "/" ); - if ( last == string::npos ) { - action = coll; - coll = "_defaultCollection"; - } - else { - action = coll.substr( last + 1 ); - coll = coll.substr( 0 , last ); + class CommandsHandler : public DbWebHandler { + public: + CommandsHandler() : DbWebHandler( "DUMMY COMMANDS" , 2 , true ){} + + bool _cmd( const string& url , string& cmd , bool& text ) const { + const char * x = url.c_str(); + + if ( x[0] != '/' ){ + // this should never happen + return false; } + + if ( strchr( x + 1 , '/' ) ) + return false; + + x++; - for ( string::size_type i=0; i *m = Command::webCommands(); + if( ! m ) + return 0; + + map::const_iterator i = m->find(cmd); + if ( i == m->end() ) + return 0; + + return i->second; + } - // TODO: this is how i guess if something is a number. pretty lame right now - double number = strtod( val , &temp ); - if ( temp != val ) - queryBuilder.append( field , number ); - else - queryBuilder.append( field , val ); - } + virtual bool handles( const string& url ) const { + string cmd; + bool text; + if ( ! _cmd( url , cmd , text ) ) + return false; - BSONObj query = queryBuilder.obj(); + return _cmd( cmd ); + } + + virtual void handle( const char *rq, string url, + string& responseMsg, int& responseCode, + vector& headers, const SockAddr &from ){ + + string cmd; + bool text = false; + assert( _cmd( url , cmd , text ) ); + Command * c = _cmd( cmd ); + assert( c ); - auto_ptr cursor = db.query( ns.c_str() , query, num , skip ); - uassert( 13085 , "query failed for dbwebserver" , cursor.get() ); - if ( one ) { - if ( cursor->more() ) { - BSONObj obj = cursor->next(); - out << obj.jsonString() << "\n"; - } - else { - responseCode = 404; - } - return; + BSONObj cmdObj = BSON( cmd << 1 ); + Client& client = cc(); + + BSONObjBuilder result; + execCommand(c, client, 0, "admin.", cmdObj , result, false); + + responseCode = 200; + + string j = result.done().jsonString(JS, text ); + responseMsg = j; + + if( text ){ + headers.push_back( "Content-Type: text/plain" ); + responseMsg += '\n'; } - - out << "{\n"; - out << " \"offset\" : " << skip << ",\n"; - out << " \"rows\": [\n"; - - int howMany = 0; - while ( cursor->more() ) { - if ( howMany++ ) - out << " ,\n"; - BSONObj obj = cursor->next(); - out << " " << obj.jsonString(); - - } - out << "\n ],\n\n"; - - out << " \"total_rows\" : " << howMany << " ,\n"; - out << " \"query\" : " << query.jsonString() << " ,\n"; - out << " \"millis\" : " << t.millis() << "\n"; - out << "}\n"; - } - - // TODO Generate id and revision per couch POST spec - void handlePost( string ns, const char *body, BSONObj& params, int & responseCode, stringstream & out ) { - try { - BSONObj obj = fromjson( body ); - db.insert( ns.c_str(), obj ); - } catch ( ... ) { - responseCode = 400; // Bad Request. Seems reasonable for now. - out << "{ \"ok\" : false }"; - return; + else { + headers.push_back( "Content-Type: application/json" ); } - responseCode = 201; - out << "{ \"ok\" : true }"; - } - - int _getOption( BSONElement e , int def ) { - if ( e.isNumber() ) - return e.numberInt(); - if ( e.type() == String ) - return atoi( e.valuestr() ); - return def; } + + } commandsHandler; - private: - static DBDirectClient db; - }; + // --- external ---- - DBDirectClient DbWebServer::db; + string prettyHostName() { + stringstream s; + s << getHostName(); + if( mongo::cmdLine.port != CmdLine::DefaultDBPort ) + s << ':' << mongo::cmdLine.port; + return s.str(); + } void webServerThread() { Client::initThread("websvr"); - DbWebServer mini; - int p = cmdLine.port + 1000; - if ( mini.init(bind_ip, p) ) { - ListeningSockets::get()->add( mini.socket() ); - log() << "web admin interface listening on port " << p << endl; - mini.run(); - } - else { - log() << "warning: web admin interface failed to initialize on port " << p << endl; - } + const int p = cmdLine.port + 1000; + DbWebServer mini(cmdLine.bind_ip, p); + log() << "web admin interface listening on port " << p << endl; + mini.initAndListen(); cc().shutdown(); } diff -Nru mongodb-1.4.4/db/dbwebserver.h mongodb-1.6.3/db/dbwebserver.h --- mongodb-1.4.4/db/dbwebserver.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/dbwebserver.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,90 @@ +/** @file dbwebserver.h + */ + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +namespace mongo { + + class Prioritizable { + public: + Prioritizable( double p ) : _priority(p){} + double priority() const { return _priority; } + private: + double _priority; + }; + + class DbWebHandler : public Prioritizable { + public: + DbWebHandler( const string& name , double priority , bool requiresREST ); + virtual ~DbWebHandler(){} + + virtual bool handles( const string& url ) const { return url == _defaultUrl; } + + virtual bool requiresREST( const string& url ) const { return _requiresREST; } + + virtual void handle( const char *rq, // the full request + string url, + // set these and return them: + string& responseMsg, + int& responseCode, + vector& headers, // if completely empty, content-type: text/html will be added + const SockAddr &from + ) = 0; + + string toString() const { return _toString; } + static DbWebHandler * findHandler( const string& url ); + + private: + string _name; + bool _requiresREST; + + string _defaultUrl; + string _toString; + + static vector * _handlers; + }; + + class WebStatusPlugin : public Prioritizable { + public: + WebStatusPlugin( const string& secionName , double priority , const string& subheader = "" ); + virtual ~WebStatusPlugin(){} + + virtual void run( stringstream& ss ) = 0; + /** called when web server stats up */ + virtual void init() = 0; + + static void initAll(); + static void runAll( stringstream& ss ); + private: + string _name; + string _subHeading; + static vector * _plugins; + + }; + + void webServerThread(); + string prettyHostName(); + + /** @return if there are any admin users. this should not block for long and throw if can't get a lock if needed */ + bool webHaveAdminUsers(); + + /** @return admin user with this name. this should not block for long and throw if can't get a lock if needed */ + BSONObj webGetAdminUser( const string& username ); + +}; + + diff -Nru mongodb-1.4.4/db/diskloc.h mongodb-1.6.3/db/diskloc.h --- mongodb-1.4.4/db/diskloc.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/diskloc.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,17 +22,18 @@ #pragma once +#include "jsobj.h" + namespace mongo { -#pragma pack(1) class Record; class DeletedRecord; class Extent; class BtreeBucket; - class BSONObj; class MongoDataFile; +#pragma pack(1) class DiskLoc { int fileNo; /* this will be volume, file #, etc. */ int ofs; @@ -85,7 +86,10 @@ ss << hex << fileNo << ':' << ofs; return ss.str(); } - operator string() const { return toString(); } + + BSONObj toBSONObj() const { + return BSON( "file" << fileNo << "offset" << ofs ); + } int& GETOFS() { return ofs; @@ -146,7 +150,6 @@ MongoDataFile& pdf() const; }; - #pragma pack() const DiskLoc minDiskLoc(0, 1); diff -Nru mongodb-1.4.4/db/driverHelpers.cpp mongodb-1.6.3/db/driverHelpers.cpp --- mongodb-1.4.4/db/driverHelpers.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/driverHelpers.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -21,7 +21,7 @@ */ -#include "stdafx.h" +#include "pch.h" #include "jsobj.h" #include "pdfile.h" #include "namespace.h" @@ -38,16 +38,15 @@ public: BasicDriverHelper( const char * name ) : Command( name ){} - virtual LockType locktype(){ return NONE; } - virtual bool slaveOk(){ return true; } - virtual bool slaveOverrideOk(){ return true; } - + virtual LockType locktype() const { return NONE; } + virtual bool slaveOk() const { return true; } + virtual bool slaveOverrideOk(){ return true; } }; class ObjectIdTest : public BasicDriverHelper { public: ObjectIdTest() : BasicDriverHelper( "driverOIDTest" ){} - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ if ( cmdObj.firstElement().type() != jstOID ){ errmsg = "not oid"; return false; diff -Nru mongodb-1.4.4/db/extsort.cpp mongodb-1.6.3/db/extsort.cpp --- mongodb-1.4.4/db/extsort.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/extsort.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "extsort.h" #include "namespace.h" @@ -126,7 +126,7 @@ ofstream out; out.open( file.c_str() , ios_base::out | ios_base::binary ); - ASSERT_STREAM_GOOD( 10051 , (string)"couldn't open file: " + file , out ); + assertStreamGood( 10051 , (string)"couldn't open file: " + file , out ); int num = 0; for ( InMemory::iterator i=_cur->begin(); i != _cur->end(); ++i ){ @@ -221,7 +221,7 @@ long length; _buf = (char*)_file.map( file.c_str() , length , MemoryMappedFile::SEQUENTIAL ); massert( 10308 , "mmap failed" , _buf ); - assert( (unsigned long)length == file_size( file ) ); + assert( (unsigned long long)length == (unsigned long long)file_size( file ) ); _end = _buf + length; } BSONObjExternalSorter::FileIterator::~FileIterator(){ diff -Nru mongodb-1.4.4/db/extsort.h mongodb-1.6.3/db/extsort.h --- mongodb-1.4.4/db/extsort.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/extsort.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" #include "namespace.h" #include "curop.h" diff -Nru mongodb-1.4.4/db/flushtest.cpp mongodb-1.6.3/db/flushtest.cpp --- mongodb-1.4.4/db/flushtest.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/flushtest.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -14,7 +14,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include #include "../util/goodies.h" #include diff -Nru mongodb-1.4.4/db/geo/2d.cpp mongodb-1.6.3/db/geo/2d.cpp --- mongodb-1.4.4/db/geo/2d.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/geo/2d.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,1589 @@ +// geo2d.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../namespace.h" +#include "../jsobj.h" +#include "../index.h" +#include "../../util/unittest.h" +#include "../commands.h" +#include "../pdfile.h" +#include "../btree.h" +#include "../curop.h" +#include "../matcher.h" + +#include "core.h" + +namespace mongo { + +#if 0 +# define GEODEBUG(x) cout << x << endl; + inline void PREFIXDEBUG(GeoHash prefix, const GeoConvert* g){ + if (!prefix.constrains()) { + cout << "\t empty prefix" << endl; + return ; + } + + Point ll (g, prefix); // lower left + prefix.move(1,1); + Point tr (g, prefix); // top right + + Point center ( (ll._x+tr._x)/2, (ll._y+tr._y)/2 ); + double radius = fabs(ll._x - tr._x) / 2; + + cout << "\t ll: " << ll.toString() << " tr: " << tr.toString() + << " center: " << center.toString() << " radius: " << radius << endl; + + } +#else +# define GEODEBUG(x) +# define PREFIXDEBUG(x, y) +#endif + + double EARTH_RADIUS_KM = 6371; + double EARTH_RADIUS_MILES = EARTH_RADIUS_KM * 0.621371192; + + + GeoBitSets geoBitSets; + + const string GEO2DNAME = "2d"; + + class Geo2dType : public IndexType , public GeoConvert { + public: + Geo2dType( const IndexPlugin * plugin , const IndexSpec* spec ) + : IndexType( plugin , spec ){ + + BSONObjBuilder orderBuilder; + + BSONObjIterator i( spec->keyPattern ); + while ( i.more() ){ + BSONElement e = i.next(); + if ( e.type() == String && GEO2DNAME == e.valuestr() ){ + uassert( 13022 , "can't have 2 geo field" , _geo.size() == 0 ); + uassert( 13023 , "2d has to be first in index" , _other.size() == 0 ); + _geo = e.fieldName(); + } + else { + _other.push_back( e.fieldName() ); + } + orderBuilder.append( "" , 1 ); + } + + uassert( 13024 , "no geo field specified" , _geo.size() ); + + _bits = _configval( spec , "bits" , 26 ); // for lat/long, ~ 1ft + + uassert( 13028 , "can't have more than 32 bits in geo index" , _bits <= 32 ); + + _max = _configval( spec , "max" , 180 ); + _min = _configval( spec , "min" , -180 ); + + _scaling = (1024*1024*1024*4.0)/(_max-_min); + + _order = orderBuilder.obj(); + + GeoHash a(0, 0, _bits); + GeoHash b = a; + b.move(1, 1); + _error = distance(a, b); + } + + int _configval( const IndexSpec* spec , const string& name , int def ){ + BSONElement e = spec->info[name]; + if ( e.isNumber() ) + return e.numberInt(); + return def; + } + + ~Geo2dType(){ + + } + + virtual BSONObj fixKey( const BSONObj& in ) { + if ( in.firstElement().type() == BinData ) + return in; + + BSONObjBuilder b(in.objsize()+16); + + if ( in.firstElement().isABSONObj() ) + _hash( in.firstElement().embeddedObject() ).append( b , "" ); + else if ( in.firstElement().type() == String ) + GeoHash( in.firstElement().valuestr() ).append( b , "" ); + else if ( in.firstElement().type() == RegEx ) + GeoHash( in.firstElement().regex() ).append( b , "" ); + else + return in; + + BSONObjIterator i(in); + i.next(); + while ( i.more() ) + b.append( i.next() ); + return b.obj(); + } + + virtual void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { + BSONElement geo = obj.getFieldDotted(_geo.c_str()); + if ( geo.eoo() ) + return; + + BSONObjBuilder b(64); + + if ( ! geo.isABSONObj() ) + return; + + BSONObj embed = geo.embeddedObject(); + if ( embed.isEmpty() ) + return; + + _hash( embed ).append( b , "" ); + + for ( size_t i=0; i<_other.size(); i++ ){ + BSONElement e = obj[_other[i]]; + if ( e.eoo() ) + e = _spec->missingField(); + b.appendAs( e , "" ); + } + keys.insert( b.obj() ); + } + + GeoHash _tohash( const BSONElement& e ) const { + if ( e.isABSONObj() ) + return _hash( e.embeddedObject() ); + + return GeoHash( e , _bits ); + } + + GeoHash _hash( const BSONObj& o ) const { + BSONObjIterator i(o); + uassert( 13067 , "geo field is empty" , i.more() ); + BSONElement x = i.next(); + uassert( 13068 , "geo field only has 1 element" , i.more() ); + BSONElement y = i.next(); + + uassert( 13026 , "geo values have to be numbers: " + o.toString() , x.isNumber() && y.isNumber() ); + + return hash( x.number() , y.number() ); + } + + GeoHash hash( double x , double y ) const { + return GeoHash( _convert(x), _convert(y) , _bits ); + } + + BSONObj _unhash( const GeoHash& h ) const { + unsigned x , y; + h.unhash( x , y ); + BSONObjBuilder b; + b.append( "x" , _unconvert( x ) ); + b.append( "y" , _unconvert( y ) ); + return b.obj(); + } + + unsigned _convert( double in ) const { + uassert( 13027 , "point not in range" , in <= (_max + _error) && in >= (_min - _error) ); + in -= _min; + assert( in > 0 ); + return (unsigned)(in * _scaling); + } + + double _unconvert( unsigned in ) const { + double x = in; + x /= _scaling; + x += _min; + return x; + } + + void unhash( const GeoHash& h , double& x , double& y ) const { + unsigned a,b; + h.unhash(a,b); + x = _unconvert( a ); + y = _unconvert( b ); + } + + double distance( const GeoHash& a , const GeoHash& b ) const { + double ax,ay,bx,by; + unhash( a , ax , ay ); + unhash( b , bx , by ); + + double dx = bx - ax; + double dy = by - ay; + + return sqrt( ( dx * dx ) + ( dy * dy ) ); + } + + double sizeDiag( const GeoHash& a ) const { + GeoHash b = a; + b.move( 1 , 1 ); + return distance( a , b ); + } + + double sizeEdge( const GeoHash& a ) const { + double ax,ay,bx,by; + GeoHash b = a; + b.move( 1 , 1 ); + unhash( a, ax, ay ); + unhash( b, bx, by ); + return (fabs(ax-bx)); + } + + const IndexDetails* getDetails() const { + return _spec->getDetails(); + } + + virtual shared_ptr newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const; + + virtual IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const { + BSONElement e = query.getFieldDotted(_geo.c_str()); + switch ( e.type() ){ + case Object: { + BSONObj sub = e.embeddedObject(); + switch ( sub.firstElement().getGtLtOp() ){ + case BSONObj::opNEAR: + case BSONObj::opWITHIN: + return OPTIMAL; + default:; + } + } + case Array: + return HELPFUL; + default: + return USELESS; + } + } + + string _geo; + vector _other; + + unsigned _bits; + int _max; + int _min; + double _scaling; + + BSONObj _order; + double _error; + }; + + class Box { + public: + + Box( const Geo2dType * g , const GeoHash& hash ) + : _min( g , hash ) , + _max( _min._x + g->sizeEdge( hash ) , _min._y + g->sizeEdge( hash ) ){ + } + + Box( double x , double y , double size ) + : _min( x , y ) , + _max( x + size , y + size ){ + } + + Box( Point min , Point max ) + : _min( min ) , _max( max ){ + } + + Box(){} + + string toString() const { + StringBuilder buf(64); + buf << _min.toString() << " -->> " << _max.toString(); + return buf.str(); + } + + bool between( double min , double max , double val , double fudge=0) const { + return val + fudge >= min && val <= max + fudge; + } + + bool mid( double amin , double amax , double bmin , double bmax , bool min , double& res ) const { + assert( amin <= amax ); + assert( bmin <= bmax ); + + if ( amin < bmin ){ + if ( amax < bmin ) + return false; + res = min ? bmin : amax; + return true; + } + if ( amin > bmax ) + return false; + res = min ? amin : bmax; + return true; + } + + double intersects( const Box& other ) const { + + Point boundMin(0,0); + Point boundMax(0,0); + + if ( mid( _min._x , _max._x , other._min._x , other._max._x , true , boundMin._x ) == false || + mid( _min._x , _max._x , other._min._x , other._max._x , false , boundMax._x ) == false || + mid( _min._y , _max._y , other._min._y , other._max._y , true , boundMin._y ) == false || + mid( _min._y , _max._y , other._min._y , other._max._y , false , boundMax._y ) == false ) + return 0; + + Box intersection( boundMin , boundMax ); + + return intersection.area() / ( ( area() + other.area() ) / 2 ); + } + + double area() const { + return ( _max._x - _min._x ) * ( _max._y - _min._y ); + } + + Point center() const { + return Point( ( _min._x + _max._x ) / 2 , + ( _min._y + _max._y ) / 2 ); + } + + bool inside( Point p , double fudge = 0 ){ + bool res = inside( p._x , p._y , fudge ); + //cout << "is : " << p.toString() << " in " << toString() << " = " << res << endl; + return res; + } + + bool inside( double x , double y , double fudge = 0 ){ + return + between( _min._x , _max._x , x , fudge ) && + between( _min._y , _max._y , y , fudge ); + } + + Point _min; + Point _max; + }; + + class Geo2dPlugin : public IndexPlugin { + public: + Geo2dPlugin() : IndexPlugin( GEO2DNAME ){ + } + + virtual IndexType* generate( const IndexSpec* spec ) const { + return new Geo2dType( this , spec ); + } + } geo2dplugin; + + struct GeoUnitTest : public UnitTest { + + int round( double d ){ + return (int)(.5+(d*1000)); + } + +#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); } + + void run(){ + assert( ! GeoHash::isBitSet( 0 , 0 ) ); + assert( ! GeoHash::isBitSet( 0 , 31 ) ); + assert( GeoHash::isBitSet( 1 , 31 ) ); + + IndexSpec i( BSON( "loc" << "2d" ) ); + Geo2dType g( &geo2dplugin , &i ); + { + double x = 73.01212; + double y = 41.352964; + BSONObj in = BSON( "x" << x << "y" << y ); + GeoHash h = g._hash( in ); + BSONObj out = g._unhash( h ); + assert( round(x) == round( out["x"].number() ) ); + assert( round(y) == round( out["y"].number() ) ); + assert( round( in["x"].number() ) == round( out["x"].number() ) ); + assert( round( in["y"].number() ) == round( out["y"].number() ) ); + } + + { + double x = -73.01212; + double y = 41.352964; + BSONObj in = BSON( "x" << x << "y" << y ); + GeoHash h = g._hash( in ); + BSONObj out = g._unhash( h ); + assert( round(x) == round( out["x"].number() ) ); + assert( round(y) == round( out["y"].number() ) ); + assert( round( in["x"].number() ) == round( out["x"].number() ) ); + assert( round( in["y"].number() ) == round( out["y"].number() ) ); + } + + { + GeoHash h( "0000" ); + h.move( 0 , 1 ); + GEOHEQ( h , "0001" ); + h.move( 0 , -1 ); + GEOHEQ( h , "0000" ); + + h.init( "0001" ); + h.move( 0 , 1 ); + GEOHEQ( h , "0100" ); + h.move( 0 , -1 ); + GEOHEQ( h , "0001" ); + + + h.init( "0000" ); + h.move( 1 , 0 ); + GEOHEQ( h , "0010" ); + } + + { + Box b( 5 , 5 , 2 ); + assert( "(5,5) -->> (7,7)" == b.toString() ); + } + + { + GeoHash a = g.hash( 1 , 1 ); + GeoHash b = g.hash( 4 , 5 ); + assert( 5 == (int)(g.distance( a , b ) ) ); + a = g.hash( 50 , 50 ); + b = g.hash( 42 , 44 ); + assert( round(10) == round(g.distance( a , b )) ); + } + + { + GeoHash x("0000"); + assert( 0 == x.getHash() ); + x.init( 0 , 1 , 32 ); + GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" ) + + assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) ); + assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) ); + } + + { + GeoHash x("1010"); + GEOHEQ( x , "1010" ); + GeoHash y = x + "01"; + GEOHEQ( y , "101001" ); + } + + { + + GeoHash a = g.hash( 5 , 5 ); + GeoHash b = g.hash( 5 , 7 ); + GeoHash c = g.hash( 100 , 100 ); + /* + cout << "a: " << a << endl; + cout << "b: " << b << endl; + cout << "c: " << c << endl; + + cout << "a: " << a.toStringHex1() << endl; + cout << "b: " << b.toStringHex1() << endl; + cout << "c: " << c.toStringHex1() << endl; + */ + BSONObj oa = a.wrap(); + BSONObj ob = b.wrap(); + BSONObj oc = c.wrap(); + /* + cout << "a: " << oa.hexDump() << endl; + cout << "b: " << ob.hexDump() << endl; + cout << "c: " << oc.hexDump() << endl; + */ + assert( oa.woCompare( ob ) < 0 ); + assert( oa.woCompare( oc ) < 0 ); + + } + + { + GeoHash x( "000000" ); + x.move( -1 , 0 ); + GEOHEQ( x , "101010" ); + x.move( 1 , -1 ); + GEOHEQ( x , "010101" ); + x.move( 0 , 1 ); + GEOHEQ( x , "000000" ); + } + + { + GeoHash prefix( "110011000000" ); + GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" ); + assert( ! entry.hasPrefix( prefix ) ); + + entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000"); + assert( entry.toString().find( prefix.toString() ) == 0 ); + assert( entry.hasPrefix( GeoHash( "1100" ) ) ); + assert( entry.hasPrefix( prefix ) ); + } + + { + GeoHash a = g.hash( 50 , 50 ); + GeoHash b = g.hash( 48 , 54 ); + assert( round( 4.47214 ) == round( g.distance( a , b ) ) ); + } + + + { + Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) ); + assert( b.inside( 29.763 , -95.363 ) ); + assert( ! b.inside( 32.9570255 , -96.1082497 ) ); + assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) ); + } + + { + GeoHash a( "11001111" ); + assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) ); + assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) ); + } + + { + int N = 10000; + { + Timer t; + for ( int i=0; i 2469 && dist < 2470 ); + } + + } + } + } geoUnitTest; + + class GeoPoint { + public: + GeoPoint(){ + } + + GeoPoint( const KeyNode& node , double distance ) + : _key( node.key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _distance( distance ){ + } + + GeoPoint( const BSONObj& key , DiskLoc loc , double distance ) + : _key(key) , _loc(loc) , _o( loc.obj() ) , _distance( distance ){ + } + + bool operator<( const GeoPoint& other ) const { + return _distance < other._distance; + } + + bool isEmpty() const { + return _o.isEmpty(); + } + + BSONObj _key; + DiskLoc _loc; + BSONObj _o; + double _distance; + }; + + class GeoAccumulator { + public: + GeoAccumulator( const Geo2dType * g , const BSONObj& filter ) + : _g(g) , _lookedAt(0) , _objectsLoaded(0) , _found(0) { + if ( ! filter.isEmpty() ){ + _matcher.reset( new CoveredIndexMatcher( filter , g->keyPattern() ) ); + } + } + + virtual ~GeoAccumulator(){ + } + + virtual void add( const KeyNode& node ){ + // when looking at other boxes, don't want to look at some object twice + pair::iterator,bool> seenBefore = _seen.insert( node.recordLoc ); + if ( ! seenBefore.second ){ + GEODEBUG( "\t\t\t\t already seen : " << node.recordLoc.obj()["_id"] ); + return; + } + _lookedAt++; + + // distance check + double d = 0; + if ( ! checkDistance( GeoHash( node.key.firstElement() ) , d ) ){ + GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << d ); + return; + } + GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << d ); + + // matcher + MatchDetails details; + if ( _matcher.get() ){ + bool good = _matcher->matches( node.key , node.recordLoc , &details ); + if ( details.loadedObject ) + _objectsLoaded++; + + if ( ! good ){ + GEODEBUG( "\t\t\t\t didn't match : " << node.recordLoc.obj()["_id"] ); + return; + } + } + + if ( ! details.loadedObject ) // dont double count + _objectsLoaded++; + + addSpecific( node , d ); + _found++; + } + + virtual void addSpecific( const KeyNode& node , double d ) = 0; + virtual bool checkDistance( const GeoHash& node , double& d ) = 0; + + long long found() const { + return _found; + } + + const Geo2dType * _g; + set _seen; + auto_ptr _matcher; + + long long _lookedAt; + long long _objectsLoaded; + long long _found; + }; + + class GeoHopper : public GeoAccumulator { + public: + typedef multiset Holder; + + GeoHopper( const Geo2dType * g , unsigned max , const GeoHash& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits::max() ) + : GeoAccumulator( g , filter ) , _max( max ) , _near( n ), _maxDistance( maxDistance ) { + _farthest = -1; + } + + virtual bool checkDistance( const GeoHash& h , double& d ){ + d = _g->distance( _near , h ); + bool good = d < _maxDistance && ( _points.size() < _max || d < farthest() ); + GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near << "\t" << h << "\t" << d + << " ok: " << good << " farthest: " << farthest() ); + return good; + } + + virtual void addSpecific( const KeyNode& node , double d ){ + GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d ); + _points.insert( GeoPoint( node.key , node.recordLoc , d ) ); + if ( _points.size() > _max ){ + _points.erase( --_points.end() ); + } + + Holder::iterator i = _points.end(); + i--; + _farthest = i->_distance; + } + + double farthest() const { + return _farthest; + } + + unsigned _max; + GeoHash _near; + Holder _points; + double _maxDistance; + double _farthest; + }; + + struct BtreeLocation { + int pos; + bool found; + DiskLoc bucket; + + BSONObj key(){ + if ( bucket.isNull() ) + return BSONObj(); + return bucket.btree()->keyNode( pos ).key; + } + + bool hasPrefix( const GeoHash& hash ){ + BSONElement e = key().firstElement(); + if ( e.eoo() ) + return false; + return GeoHash( e ).hasPrefix( hash ); + } + + bool advance( int direction , int& totalFound , GeoAccumulator* all ){ + + if ( bucket.isNull() ) + return false; + bucket = bucket.btree()->advance( bucket , pos , direction , "btreelocation" ); + + if ( all ) + return checkCur( totalFound , all ); + + return ! bucket.isNull(); + } + + bool checkCur( int& totalFound , GeoAccumulator* all ){ + if ( bucket.isNull() ) + return false; + + if ( bucket.btree()->isUsed(pos) ){ + totalFound++; + all->add( bucket.btree()->keyNode( pos ) ); + } + else { + GEODEBUG( "\t\t\t\t not used: " << key() ); + } + + return true; + } + + string toString(){ + stringstream ss; + ss << "bucket: " << bucket.toString() << " pos: " << pos << " found: " << found; + return ss.str(); + } + + static bool initial( const IndexDetails& id , const Geo2dType * spec , + BtreeLocation& min , BtreeLocation& max , + GeoHash start , + int & found , GeoAccumulator * hopper ) + { + + Ordering ordering = Ordering::make(spec->_order); + + min.bucket = id.head.btree()->locate( id , id.head , start.wrap() , + ordering , min.pos , min.found , minDiskLoc ); + min.checkCur( found , hopper ); + max = min; + + if ( min.bucket.isNull() || ( !(hopper->found()) ) ){ + min.bucket = id.head.btree()->locate( id , id.head , start.wrap() , + ordering , min.pos , min.found , minDiskLoc , -1 ); + min.checkCur( found , hopper ); + } + + return ! min.bucket.isNull() || ! max.bucket.isNull(); + } + }; + + class GeoSearch { + public: + GeoSearch( const Geo2dType * g , const GeoHash& n , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits::max() ) + : _spec( g ) , _n( n ) , _start( n ) , + _numWanted( numWanted ) , _filter( filter ) , _maxDistance( maxDistance ) , + _hopper( new GeoHopper( g , numWanted , n , filter , maxDistance ) ) + { + assert( g->getDetails() ); + _nscanned = 0; + _found = 0; + } + + void exec(){ + const IndexDetails& id = *_spec->getDetails(); + + BtreeBucket * head = id.head.btree(); + assert( head ); + /* + * Search algorithm + * 1) use geohash prefix to find X items + * 2) compute max distance from want to an item + * 3) find optimal set of boxes that complete circle + * 4) use regular btree cursors to scan those boxes + */ + + GeoHopper * hopper = _hopper.get(); + + _prefix = _start; + { // 1 regular geo hash algorithm + + + BtreeLocation min,max; + if ( ! BtreeLocation::initial( id , _spec , min , max , _n , _found , hopper ) ) + return; + + while ( _hopper->found() < _numWanted ){ + GEODEBUG( _prefix << "\t" << _found << "\t DESC" ); + while ( min.hasPrefix( _prefix ) && min.advance( -1 , _found , hopper ) ) + _nscanned++; + GEODEBUG( _prefix << "\t" << _found << "\t ASC" ); + while ( max.hasPrefix( _prefix ) && max.advance( 1 , _found , hopper ) ) + _nscanned++; + if ( ! _prefix.constrains() ) + break; + _prefix = _prefix.up(); + + double temp = _spec->distance( _prefix , _start ); + if ( temp > ( _maxDistance * 2 ) ) + break; + } + } + GEODEBUG( "done part 1" ); + if ( _found && _prefix.constrains() ){ + // 2 + Point center( _spec , _n ); + double farthest = hopper->farthest(); + // Phase 1 might not have found any points. + if (farthest == -1) + farthest = _spec->sizeDiag( _prefix ); + Box want( center._x - farthest , center._y - farthest , farthest * 2 ); + _prefix = _n; + while ( _spec->sizeEdge( _prefix ) < ( farthest / 2 ) ){ + _prefix = _prefix.up(); + } + + if ( logLevel > 0 ){ + log(1) << "want: " << want << " found:" << _found << " nscanned: " << _nscanned << " hash size:" << _spec->sizeEdge( _prefix ) + << " farthest: " << farthest << " using box: " << Box( _spec , _prefix ).toString() << endl; + } + + for ( int x=-1; x<=1; x++ ){ + for ( int y=-1; y<=1; y++ ){ + GeoHash toscan = _prefix; + toscan.move( x , y ); + + // 3 & 4 + doBox( id , want , toscan ); + } + } + } + GEODEBUG( "done search" ) + + } + + void doBox( const IndexDetails& id , const Box& want , const GeoHash& toscan , int depth = 0 ){ + Box testBox( _spec , toscan ); + if ( logLevel > 2 ){ + cout << "\t"; + for ( int i=0; ilocate( id , id.head , toscan.wrap() , Ordering::make(_spec->_order) , + loc.pos , loc.found , minDiskLoc ); + loc.checkCur( _found , _hopper.get() ); + while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ){ + _nscanned++; + if ( ++myscanned > 100 && goDeeper ){ + doBox( id , want , toscan + "00" , depth + 1); + doBox( id , want , toscan + "01" , depth + 1); + doBox( id , want , toscan + "10" , depth + 1); + doBox( id , want , toscan + "11" , depth + 1); + return; + } + } + + } + + + const Geo2dType * _spec; + + GeoHash _n; + GeoHash _start; + GeoHash _prefix; + int _numWanted; + BSONObj _filter; + double _maxDistance; + shared_ptr _hopper; + + long long _nscanned; + int _found; + }; + + class GeoCursorBase : public Cursor { + public: + GeoCursorBase( const Geo2dType * spec ) + : _spec( spec ), _id( _spec->getDetails() ){ + + } + + virtual DiskLoc refLoc(){ return DiskLoc(); } + + virtual BSONObj indexKeyPattern() { + return _spec->keyPattern(); + } + + virtual void noteLocation() { + assert(0); + } + + /* called before query getmore block is iterated */ + virtual void checkLocation() { + assert(0); + } + + virtual bool supportGetMore() { return false; } + virtual bool supportYields() { return false; } + + virtual bool getsetdup(DiskLoc loc){ + return false; + } + + const Geo2dType * _spec; + const IndexDetails * _id; + }; + + class GeoSearchCursor : public GeoCursorBase { + public: + GeoSearchCursor( shared_ptr s ) + : GeoCursorBase( s->_spec ) , + _s( s ) , _cur( s->_hopper->_points.begin() ) , _end( s->_hopper->_points.end() ) { + } + + virtual ~GeoSearchCursor() {} + + virtual bool ok(){ + return _cur != _end; + } + + virtual Record* _current(){ assert(ok()); return _cur->_loc.rec(); } + virtual BSONObj current(){ assert(ok()); return _cur->_o; } + virtual DiskLoc currLoc(){ assert(ok()); return _cur->_loc; } + virtual bool advance(){ _cur++; return ok(); } + virtual BSONObj currKey() const { return _cur->_key; } + + virtual string toString() { + return "GeoSearchCursor"; + } + + + virtual BSONObj prettyStartKey() const { + return BSON( _s->_spec->_geo << _s->_prefix.toString() ); + } + virtual BSONObj prettyEndKey() const { + GeoHash temp = _s->_prefix; + temp.move( 1 , 1 ); + return BSON( _s->_spec->_geo << temp.toString() ); + } + + + shared_ptr _s; + GeoHopper::Holder::iterator _cur; + GeoHopper::Holder::iterator _end; + }; + + class GeoBrowse : public GeoCursorBase , public GeoAccumulator { + public: + GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj() ) + : GeoCursorBase( g ) ,GeoAccumulator( g , filter ) , + _type( type ) , _filter( filter ) , _firstCall(true) { + } + + virtual string toString() { + return (string)"GeoBrowse-" + _type; + } + + virtual bool ok(){ + if ( _firstCall ){ + fillStack(); + _firstCall = false; + } + if ( ! _cur.isEmpty() || _stack.size() ) + return true; + + while ( moreToDo() ){ + fillStack(); + if ( ! _cur.isEmpty() ) + return true; + } + + return false; + } + + virtual bool advance(){ + _cur._o = BSONObj(); + + if ( _stack.size() ){ + _cur = _stack.front(); + _stack.pop_front(); + return true; + } + + if ( ! moreToDo() ) + return false; + + while ( _cur.isEmpty() && moreToDo() ) + fillStack(); + return ! _cur.isEmpty(); + } + + virtual Record* _current(){ assert(ok()); return _cur._loc.rec(); } + virtual BSONObj current(){ assert(ok()); return _cur._o; } + virtual DiskLoc currLoc(){ assert(ok()); return _cur._loc; } + virtual BSONObj currKey() const { return _cur._key; } + + + virtual bool moreToDo() = 0; + virtual void fillStack() = 0; + + virtual void addSpecific( const KeyNode& node , double d ){ + if ( _cur.isEmpty() ) + _cur = GeoPoint( node , d ); + else + _stack.push_back( GeoPoint( node , d ) ); + } + + string _type; + BSONObj _filter; + list _stack; + + GeoPoint _cur; + bool _firstCall; + + }; + + class GeoCircleBrowse : public GeoBrowse { + public: + + enum State { + START , + DOING_EXPAND , + DOING_AROUND , + DONE + } _state; + + GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() ) + : GeoBrowse( g , "circle" , filter ){ + + uassert( 13060 , "$center needs 2 fields (middle,max distance)" , circle.nFields() == 2 ); + BSONObjIterator i(circle); + _startPt = Point(i.next()); + _start = _startPt.hash(g); + _prefix = _start; + _maxDistance = i.next().numberDouble(); + uassert( 13061 , "need a max distance > 0 " , _maxDistance > 0 ); + _maxDistance += g->_error; + + _state = START; + _found = 0; + + ok(); + } + + virtual bool moreToDo(){ + return _state != DONE; + } + + virtual void fillStack(){ + if ( _state == START ){ + if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , + _prefix , _found , this ) ){ + _state = DONE; + return; + } + _state = DOING_EXPAND; + } + + + if ( _state == DOING_AROUND ){ + // TODO could rework and return rather than looping + for (int i=-1; i<=1; i++){ + for (int j=-1; j<=1; j++){ + if (i == 0 && j == 0) + continue; // main box + + GeoHash newBox = _prefix; + newBox.move(i, j); + + PREFIXDEBUG(newBox, _g); + if (needToCheckBox(newBox)){ + // TODO consider splitting into quadrants + getPointsForPrefix(newBox); + } else { + GEODEBUG("skipping box"); + } + } + } + + _state = DONE; + return; + } + + if (_state == DOING_EXPAND){ + GEODEBUG( "circle prefix [" << _prefix << "]" ); + PREFIXDEBUG(_prefix, _g); + + while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _found , this ) ); + while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _found , this ) ); + + if ( ! _prefix.constrains() ){ + GEODEBUG( "\t exhausted the btree" ); + _state = DONE; + return; + } + + + Point ll (_g, _prefix); + GeoHash trHash = _prefix; + trHash.move( 1 , 1 ); + Point tr (_g, trHash); + double sideLen = fabs(tr._x - ll._x); + + if (sideLen > _maxDistance){ // circle must be contained by surrounding squares + if ( (ll._x + _maxDistance < _startPt._x && ll._y + _maxDistance < _startPt._y) && + (tr._x - _maxDistance > _startPt._x && tr._y - _maxDistance > _startPt._y) ) + { + GEODEBUG("square fully contains circle"); + _state = DONE; + } else if (_prefix.getBits() > 1){ + GEODEBUG("checking surrounding squares"); + _state = DOING_AROUND; + } else { + GEODEBUG("using simple search"); + _prefix = _prefix.up(); + } + } else { + _prefix = _prefix.up(); + } + + return; + } + + /* Clients are expected to use moreToDo before calling + * fillStack, so DONE is checked for there. If any more + * State values are defined, you should handle them + * here. */ + assert(0); + } + + bool needToCheckBox(const GeoHash& prefix){ + Point ll (_g, prefix); + if (fabs(ll._x - _startPt._x) <= _maxDistance) return true; + if (fabs(ll._y - _startPt._y) <= _maxDistance) return true; + + GeoHash trHash = _prefix; + trHash.move( 1 , 1 ); + Point tr (_g, trHash); + + if (fabs(tr._x - _startPt._x) <= _maxDistance) return true; + if (fabs(tr._y - _startPt._y) <= _maxDistance) return true; + + return false; + } + + void getPointsForPrefix(const GeoHash& prefix){ + if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , prefix , _found , this ) ){ + return; + } + + while ( _min.hasPrefix( prefix ) && _min.advance( -1 , _found , this ) ); + while ( _max.hasPrefix( prefix ) && _max.advance( 1 , _found , this ) ); + } + + + virtual bool checkDistance( const GeoHash& h , double& d ){ + d = _g->distance( _start , h ); + GEODEBUG( "\t " << h << "\t" << d ); + return d <= _maxDistance; + } + + GeoHash _start; + Point _startPt; + double _maxDistance; + + int _found; + + GeoHash _prefix; + BtreeLocation _min; + BtreeLocation _max; + + }; + + class GeoBoxBrowse : public GeoBrowse { + public: + + enum State { + START , + DOING_EXPAND , + DONE + } _state; + + GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() ) + : GeoBrowse( g , "box" , filter ){ + + uassert( 13063 , "$box needs 2 fields (bottomLeft,topRight)" , box.nFields() == 2 ); + BSONObjIterator i(box); + _bl = g->_tohash( i.next() ); + _tr = g->_tohash( i.next() ); + + _want._min = Point( _g , _bl ); + _want._max = Point( _g , _tr ); + + uassert( 13064 , "need an area > 0 " , _want.area() > 0 ); + + _state = START; + _found = 0; + + Point center = _want.center(); + _prefix = _g->hash( center._x , center._y ); + + GEODEBUG( "center : " << center.toString() << "\t" << _prefix ); + + { + GeoHash a(0LL,32); + GeoHash b(0LL,32); + b.move(1,1); + _fudge = _g->distance(a,b); + } + + ok(); + } + + virtual bool moreToDo(){ + return _state != DONE; + } + + virtual void fillStack(){ + if ( _state == START ){ + + if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , + _prefix , _found , this ) ){ + _state = DONE; + return; + } + _state = DOING_EXPAND; + } + + if ( _state == DOING_EXPAND ){ + int started = _found; + while ( started == _found || _state == DONE ){ + GEODEBUG( "box prefix [" << _prefix << "]" ); + while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _found , this ) ); + while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _found , this ) ); + + if ( _state == DONE ) + return; + + if ( ! _prefix.constrains() ){ + GEODEBUG( "box exhausted" ); + _state = DONE; + return; + } + + Box cur( _g , _prefix ); + if ( cur._min._x + _fudge < _want._min._x && + cur._min._y + _fudge < _want._min._y && + cur._max._x - _fudge > _want._max._x && + cur._max._y - _fudge > _want._max._y ){ + + _state = DONE; + GeoHash temp = _prefix.commonPrefix( cur._max.hash( _g ) ); + + GEODEBUG( "box done : " << cur.toString() << " prefix:" << _prefix << " common:" << temp ); + + if ( temp == _prefix ) + return; + _prefix = temp; + GEODEBUG( "\t one more loop" ); + continue; + } + else { + _prefix = _prefix.up(); + } + } + return; + } + + } + + virtual bool checkDistance( const GeoHash& h , double& d ){ + bool res = _want.inside( Point( _g , h ) , _fudge ); + GEODEBUG( "\t want : " << _want.toString() + << " point: " << Point( _g , h ).toString() + << " in : " << res ); + return res; + } + + GeoHash _bl; + GeoHash _tr; + Box _want; + + int _found; + + GeoHash _prefix; + BtreeLocation _min; + BtreeLocation _max; + + double _fudge; + }; + + + shared_ptr Geo2dType::newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const { + if ( numWanted < 0 ) + numWanted = numWanted * -1; + else if ( numWanted == 0 ) + numWanted = 100; + + BSONObjIterator i(query); + while ( i.more() ){ + BSONElement e = i.next(); + + if ( _geo != e.fieldName() ) + continue; + + if ( e.type() != Object ) + continue; + + switch ( e.embeddedObject().firstElement().getGtLtOp() ){ + case BSONObj::opNEAR: { + BSONObj n = e.embeddedObject(); + e = n.firstElement(); + double maxDistance = numeric_limits::max(); + if ( e.isABSONObj() && e.embeddedObject().nFields() > 2 ){ + BSONObjIterator i(e.embeddedObject()); + i.next(); + i.next(); + BSONElement e = i.next(); + if ( e.isNumber() ) + maxDistance = e.numberDouble(); + } + { + BSONElement e = n["$maxDistance"]; + if ( e.isNumber() ) + maxDistance = e.numberDouble(); + } + shared_ptr s( new GeoSearch( this , _tohash(e) , numWanted , query , maxDistance ) ); + s->exec(); + shared_ptr c; + c.reset( new GeoSearchCursor( s ) ); + return c; + } + case BSONObj::opWITHIN: { + e = e.embeddedObject().firstElement(); + uassert( 13057 , "$within has to take an object or array" , e.isABSONObj() ); + e = e.embeddedObject().firstElement(); + string type = e.fieldName(); + if ( type == "$center" ){ + uassert( 13059 , "$center has to take an object or array" , e.isABSONObj() ); + shared_ptr c; + c.reset( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query ) ); + return c; + } + else if ( type == "$box" ){ + uassert( 13065 , "$box has to take an object or array" , e.isABSONObj() ); + shared_ptr c; + c.reset( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) ); + return c; + } + throw UserException( 13058 , (string)"unknown $with type: " + type ); + } + default: + break; + } + } + + throw UserException( 13042 , (string)"missing geo field (" + _geo + ") in : " + query.toString() ); + } + + // ------ + // commands + // ------ + + class Geo2dFindNearCmd : public Command { + public: + Geo2dFindNearCmd() : Command( "geoNear" ){} + virtual LockType locktype() const { return READ; } + bool slaveOk() const { return true; } + void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; } + bool slaveOverrideOk() { return true; } + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ + string ns = dbname + "." + cmdObj.firstElement().valuestr(); + + NamespaceDetails * d = nsdetails( ns.c_str() ); + if ( ! d ){ + errmsg = "can't find ns"; + return false; + } + + vector idxs; + d->findIndexByType( GEO2DNAME , idxs ); + + if ( idxs.size() > 1 ){ + errmsg = "more than 1 geo indexes :("; + return false; + } + + if ( idxs.size() == 0 ){ + errmsg = "no geo index :("; + return false; + } + + int geoIdx = idxs[0]; + + result.append( "ns" , ns ); + + IndexDetails& id = d->idx( geoIdx ); + Geo2dType * g = (Geo2dType*)id.getSpec().getType(); + assert( &id == g->getDetails() ); + + int numWanted = 100; + if ( cmdObj["num"].isNumber() ) + numWanted = cmdObj["num"].numberInt(); + + uassert(13046, "'near' param missing/invalid", !cmdObj["near"].eoo()); + const GeoHash n = g->_tohash( cmdObj["near"] ); + result.append( "near" , n.toString() ); + + BSONObj filter; + if ( cmdObj["query"].type() == Object ) + filter = cmdObj["query"].embeddedObject(); + + double maxDistance = numeric_limits::max(); + if ( cmdObj["maxDistance"].isNumber() ) + maxDistance = cmdObj["maxDistance"].number(); + + GeoSearch gs( g , n , numWanted , filter , maxDistance ); + + if ( cmdObj["start"].type() == String){ + GeoHash start ((string) cmdObj["start"].valuestr()); + gs._start = start; + } + + gs.exec(); + + double distanceMultiplier = 1; + if ( cmdObj["distanceMultiplier"].isNumber() ) + distanceMultiplier = cmdObj["distanceMultiplier"].number(); + + double totalDistance = 0; + + + BSONObjBuilder arr( result.subarrayStart( "results" ) ); + int x = 0; + for ( GeoHopper::Holder::iterator i=gs._hopper->_points.begin(); i!=gs._hopper->_points.end(); i++ ){ + const GeoPoint& p = *i; + + double dis = distanceMultiplier * p._distance; + totalDistance += dis; + + BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ).c_str() ) ); + bb.append( "dis" , dis ); + bb.append( "obj" , p._o ); + bb.done(); + } + arr.done(); + + BSONObjBuilder stats( result.subobjStart( "stats" ) ); + stats.append( "time" , cc().curop()->elapsedMillis() ); + stats.appendNumber( "btreelocs" , gs._nscanned ); + stats.appendNumber( "nscanned" , gs._hopper->_lookedAt ); + stats.appendNumber( "objectsLoaded" , gs._hopper->_objectsLoaded ); + stats.append( "avgDistance" , totalDistance / x ); + stats.append( "maxDistance" , gs._hopper->farthest() ); + stats.done(); + + return true; + } + + } geo2dFindNearCmd; + + class GeoWalkCmd : public Command { + public: + GeoWalkCmd() : Command( "geoWalk" ){} + virtual LockType locktype() const { return READ; } + bool slaveOk() const { return true; } + bool slaveOverrideOk() { return true; } + bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ + string ns = dbname + "." + cmdObj.firstElement().valuestr(); + + NamespaceDetails * d = nsdetails( ns.c_str() ); + if ( ! d ){ + errmsg = "can't find ns"; + return false; + } + + int geoIdx = -1; + { + NamespaceDetails::IndexIterator ii = d->ii(); + while ( ii.more() ){ + IndexDetails& id = ii.next(); + if ( id.getSpec().getTypeName() == GEO2DNAME ){ + if ( geoIdx >= 0 ){ + errmsg = "2 geo indexes :("; + return false; + } + geoIdx = ii.pos() - 1; + } + } + } + + if ( geoIdx < 0 ){ + errmsg = "no geo index :("; + return false; + } + + + IndexDetails& id = d->idx( geoIdx ); + Geo2dType * g = (Geo2dType*)id.getSpec().getType(); + assert( &id == g->getDetails() ); + + int max = 100000; + + BtreeCursor c( d , geoIdx , id , BSONObj() , BSONObj() , true , 1 ); + while ( c.ok() && max-- ){ + GeoHash h( c.currKey().firstElement() ); + int len; + cout << "\t" << h.toString() + << "\t" << c.current()[g->_geo] + << "\t" << hex << h.getHash() + << "\t" << hex << ((long long*)c.currKey().firstElement().binData(len))[0] + << "\t" << c.current()["_id"] + << endl; + c.advance(); + } + + return true; + } + + } geoWalkCmd; + +} diff -Nru mongodb-1.4.4/db/geo/core.h mongodb-1.6.3/db/geo/core.h --- mongodb-1.4.4/db/geo/core.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/geo/core.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,427 @@ +// core.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../../pch.h" +#include "../jsobj.h" + +#include + +#ifndef M_PI +# define M_PI 3.14159265358979323846 +#endif + +namespace mongo { + + class GeoBitSets { + public: + GeoBitSets(){ + for ( int i=0; i<32; i++ ){ + masks32[i] = ( 1 << ( 31 - i ) ); + } + for ( int i=0; i<64; i++ ){ + masks64[i] = ( 1LL << ( 63 - i ) ); + } + + for ( unsigned i=0; i<16; i++ ){ + unsigned fixed = 0; + for ( int j=0; j<4; j++ ){ + if ( i & ( 1 << j ) ) + fixed |= ( 1 << ( j * 2 ) ); + } + hashedToNormal[fixed] = i; + } + + } + int masks32[32]; + long long masks64[64]; + + unsigned hashedToNormal[256]; + }; + + extern GeoBitSets geoBitSets; + + class GeoHash { + public: + GeoHash() + : _hash(0),_bits(0){ + } + + explicit GeoHash( const char * hash ){ + init( hash ); + } + + explicit GeoHash( const string& hash ){ + init( hash ); + } + + explicit GeoHash( const BSONElement& e , unsigned bits=32 ){ + _bits = bits; + if ( e.type() == BinData ){ + int len = 0; + _copy( (char*)&_hash , e.binData( len ) ); + assert( len == 8 ); + _bits = bits; + } + else { + cout << "GeoHash cons e : " << e << endl; + uassert(13047,"wrong type for geo index. if you're using a pre-release version, need to rebuild index",0); + } + _fix(); + } + + GeoHash( unsigned x , unsigned y , unsigned bits=32){ + init( x , y , bits ); + } + + GeoHash( const GeoHash& old ){ + _hash = old._hash; + _bits = old._bits; + } + + GeoHash( long long hash , unsigned bits ) + : _hash( hash ) , _bits( bits ){ + _fix(); + } + + void init( unsigned x , unsigned y , unsigned bits ){ + assert( bits <= 32 ); + _hash = 0; + _bits = bits; + for ( unsigned i=0; i> 1 ) & 0x55; + x |= ( geoBitSets.hashedToNormal[t] << (4*(i)) ); + } + } + + void unhash_slow( unsigned& x , unsigned& y ) const { + x = 0; + y = 0; + for ( unsigned i=0; i<_bits; i++ ){ + if ( getBitX(i) ) + x |= geoBitSets.masks32[i]; + if ( getBitY(i) ) + y |= geoBitSets.masks32[i]; + } + } + + void unhash( unsigned& x , unsigned& y ) const { + unhash_fast( x , y ); + } + + /** + * @param 0 = high + */ + static bool isBitSet( unsigned val , unsigned bit ){ + return geoBitSets.masks32[bit] & val; + } + + GeoHash up() const { + return GeoHash( _hash , _bits - 1 ); + } + + bool hasPrefix( const GeoHash& other ) const { + assert( other._bits <= _bits ); + if ( other._bits == 0 ) + return true; + long long x = other._hash ^ _hash; + x = x >> (64-(other._bits*2)); + return x == 0; + } + + + string toString() const { + StringBuilder buf( _bits * 2 ); + for ( unsigned x=0; x<_bits*2; x++ ) + buf.append( _hash & geoBitSets.masks64[x] ? "1" : "0" ); + return buf.str(); + } + + string toStringHex1() const { + stringstream ss; + ss << hex << _hash; + return ss.str(); + } + + void init( const string& s ){ + _hash = 0; + _bits = s.size() / 2; + for ( unsigned pos=0; pos 0; + } + + void move( int x , int y ){ + assert( _bits ); + _move( 0 , x ); + _move( 1 , y ); + } + + void _move( unsigned offset , int d ){ + if ( d == 0 ) + return; + assert( d <= 1 && d>= -1 ); // TEMP + + bool from, to; + if ( d > 0 ){ + from = 0; + to = 1; + } + else { + from = 1; + to = 0; + } + + unsigned pos = ( _bits * 2 ) - 1; + if ( offset == 0 ) + pos--; + while ( true ){ + if ( getBit(pos) == from ){ + setBit( pos , to ); + return; + } + + if ( pos < 2 ){ + // overflow + for ( ; pos < ( _bits * 2 ) ; pos += 2 ){ + setBit( pos , from ); + } + return; + } + + setBit( pos , from ); + pos -= 2; + } + + assert(0); + } + + GeoHash& operator=(const GeoHash& h) { + _hash = h._hash; + _bits = h._bits; + return *this; + } + + bool operator==(const GeoHash& h ){ + return _hash == h._hash && _bits == h._bits; + } + + GeoHash& operator+=( const char * s ) { + unsigned pos = _bits * 2; + _bits += strlen(s) / 2; + assert( _bits <= 32 ); + while ( s[0] ){ + if ( s[0] == '1' ) + setBit( pos , 1 ); + pos++; + s++; + } + + return *this; + } + + GeoHash operator+( const char * s ) const { + GeoHash n = *this; + n+=s; + return n; + } + + void _fix(){ + static long long FULL = 0xFFFFFFFFFFFFFFFFLL; + long long mask = FULL << ( 64 - ( _bits * 2 ) ); + _hash &= mask; + } + + void append( BSONObjBuilder& b , const char * name ) const { + char buf[8]; + _copy( buf , (char*)&_hash ); + b.appendBinData( name , 8 , bdtCustom , buf ); + } + + long long getHash() const { + return _hash; + } + + unsigned getBits() const { + return _bits; + } + + GeoHash commonPrefix( const GeoHash& other ) const { + unsigned i=0; + for ( ; i<_bits && iunhash( hash , _x , _y ); + } + + explicit Point( const BSONElement& e ){ + BSONObjIterator i(e.Obj()); + _x = i.next().number(); + _y = i.next().number(); + } + + explicit Point( const BSONObj& o ){ + BSONObjIterator i(o); + _x = i.next().number(); + _y = i.next().number(); + } + + Point( double x , double y ) + : _x( x ) , _y( y ){ + } + + Point() : _x(0),_y(0){ + } + + GeoHash hash( const GeoConvert * g ){ + return g->hash( _x , _y ); + } + + double distance( const Point& p ) const { + double a = _x - p._x; + double b = _y - p._y; + return sqrt( ( a * a ) + ( b * b ) ); + } + + string toString() const { + StringBuilder buf(32); + buf << "(" << _x << "," << _y << ")"; + return buf.str(); + + } + + double _x; + double _y; + }; + + + extern double EARTH_RADIUS_KM; + extern double EARTH_RADIUS_MILES; + + // WARNING: _x and _y MUST be longitude and latitude in that order + // note: multiply by earth radius for distance + inline double spheredist_rad( const Point& p1, const Point& p2 ) { + // this uses the n-vector formula: http://en.wikipedia.org/wiki/N-vector + // If you try to match the code to the formula, note that I inline the cross-product. + // TODO: optimize with SSE + + double sin_x1(sin(p1._x)), cos_x1(cos(p1._x)); + double sin_y1(sin(p1._y)), cos_y1(cos(p1._y)); + double sin_x2(sin(p2._x)), cos_x2(cos(p2._x)); + double sin_y2(sin(p2._y)), cos_y2(cos(p2._y)); + + double cross_prod = + (cos_y1*cos_x1 * cos_y2*cos_x2) + + (cos_y1*sin_x1 * cos_y2*sin_x2) + + (sin_y1 * sin_y2); + + return acos(cross_prod); + } + + // note: return is still in radians as that can be multiplied by radius to get arc length + inline double spheredist_deg( const Point& p1, const Point& p2 ) { + return spheredist_rad( + Point( p1._x * (M_PI/180), p1._y * (M_PI/180)), + Point( p2._x * (M_PI/180), p2._y * (M_PI/180)) + ); + } + +} diff -Nru mongodb-1.4.4/db/geo/haystack.cpp mongodb-1.6.3/db/geo/haystack.cpp --- mongodb-1.4.4/db/geo/haystack.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/geo/haystack.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,317 @@ +// db/geo/haystack.cpp + +/** + * Copyright (C) 2008 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "pch.h" +#include "../namespace.h" +#include "../jsobj.h" +#include "../index.h" +#include "../../util/unittest.h" +#include "../commands.h" +#include "../pdfile.h" +#include "../btree.h" +#include "../curop.h" +#include "../matcher.h" +#include "core.h" + +#define GEOQUADDEBUG(x) +//#define GEOQUADDEBUG(x) cout << x << endl + +/** + * this is a geo based search piece, which is different than regular geo lookup + * this is useful when you want to look for something within a region where the ratio is low + * works well for search for restaurants withing 25 miles with a certain name + * should not be used for finding the closest restaurants that are open + */ +namespace mongo { + + string GEOSEARCHNAME = "geoHaystack"; + + class GeoHaystackSearchHopper { + public: + GeoHaystackSearchHopper( const BSONObj& n , double maxDistance , unsigned limit , const string& geoField ) + : _near( n ) , _maxDistance( maxDistance ) , _limit( limit ) , _geoField(geoField){ + + } + + void got( const DiskLoc& loc ){ + Point p( loc.obj().getFieldDotted( _geoField ) ); + if ( _near.distance( p ) > _maxDistance ) + return; + _locs.push_back( loc ); + } + + int append( BSONArrayBuilder& b ){ + for ( unsigned i=0; i<_locs.size() && i<_limit; i++ ) + b.append( _locs[i].obj() ); + return _locs.size(); + } + + Point _near; + double _maxDistance; + unsigned _limit; + string _geoField; + + vector _locs; + }; + + class GeoHaystackSearchIndex : public IndexType { + + public: + + GeoHaystackSearchIndex( const IndexPlugin* plugin , const IndexSpec* spec ) + : IndexType( plugin , spec ){ + + BSONElement e = spec->info["bucketSize"]; + uassert( 13321 , "need bucketSize" , e.isNumber() ); + _bucketSize = e.numberDouble(); + + BSONObjBuilder orderBuilder; + + BSONObjIterator i( spec->keyPattern ); + while ( i.more() ){ + BSONElement e = i.next(); + if ( e.type() == String && GEOSEARCHNAME == e.valuestr() ){ + uassert( 13314 , "can't have 2 geo fields" , _geo.size() == 0 ); + uassert( 13315 , "2d has to be first in index" , _other.size() == 0 ); + _geo = e.fieldName(); + } + else { + _other.push_back( e.fieldName() ); + } + orderBuilder.append( "" , 1 ); + } + + uassert( 13316 , "no geo field specified" , _geo.size() ); + uassert( 13317 , "no other fields specified" , _other.size() ); + uassert( 13326 , "quadrant search can only have 1 other field for now" , _other.size() == 1 ); + _order = orderBuilder.obj(); + } + + int hash( const BSONElement& e ) const { + uassert( 13322 , "not a number" , e.isNumber() ); + return hash( e.numberDouble() ); + } + + int hash( double d ) const { + d += 180; + d /= _bucketSize; + return (int)d; + } + + string makeString( int hashedX , int hashedY ) const { + stringstream ss; + ss << hashedX << "_" << hashedY; + return ss.str(); + } + + void _add( const BSONObj& obj, const string& root , const BSONElement& e , BSONObjSetDefaultOrder& keys ) const { + BSONObjBuilder buf; + buf.append( "" , root ); + if ( e.eoo() ) + buf.appendNull( "" ); + else + buf.appendAs( e , "" ); + + BSONObj key = buf.obj(); + GEOQUADDEBUG( obj << "\n\t" << root << "\n\t" << key ); + keys.insert( key ); + } + + void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { + + BSONElement loc = obj.getFieldDotted( _geo ); + if ( loc.eoo() ) + return; + + uassert( 13323 , "latlng not an array" , loc.isABSONObj() ); + string root; + { + BSONObjIterator i( loc.Obj() ); + BSONElement x = i.next(); + BSONElement y = i.next(); + root = makeString( hash(x) , hash(y) ); + } + + + assert( _other.size() == 1 ); + + BSONElementSet all; + obj.getFieldsDotted( _other[0] , all ); + + if ( all.size() == 0 ){ + _add( obj , root , BSONElement() , keys ); + } + else { + for ( BSONElementSet::iterator i=all.begin(); i!=all.end(); ++i ){ + _add( obj , root , *i , keys ); + } + } + + } + + shared_ptr newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const { + shared_ptr c; + assert(0); + return c; + } + + void searchCommand( NamespaceDetails* nsd , int idxNo , + const BSONObj& n /*near*/ , double maxDistance , const BSONObj& search , + BSONObjBuilder& result , unsigned limit ){ + + Timer t; + + log(1) << "SEARCH near:" << n << " maxDistance:" << maxDistance << " search: " << search << endl; + int x,y; + { + BSONObjIterator i( n ); + x = hash( i.next() ); + y = hash( i.next() ); + } + int scale = (int)ceil( maxDistance / _bucketSize ); + + GeoHaystackSearchHopper hopper(n,maxDistance,limit,_geo); + + long long btreeMatches = 0; + + for ( int a=-scale; a<=scale; a++ ){ + for ( int b=-scale; b<=scale; b++ ){ + + BSONObjBuilder bb; + bb.append( "" , makeString( x + a , y + b ) ); + for ( unsigned i=0; i<_other.size(); i++ ){ + BSONElement e = search.getFieldDotted( _other[i] ); + if ( e.eoo() ) + bb.appendNull( "" ); + else + bb.appendAs( e , "" ); + } + + BSONObj key = bb.obj(); + + GEOQUADDEBUG( "KEY: " << key ); + + set thisPass; + BtreeCursor cursor( nsd , idxNo , *getDetails() , key , key , true , 1 ); + while ( cursor.ok() ){ + pair::iterator, bool> p = thisPass.insert( cursor.currLoc() ); + if ( p.second ){ + hopper.got( cursor.currLoc() ); + GEOQUADDEBUG( "\t" << cursor.current() ); + btreeMatches++; + } + cursor.advance(); + } + } + + } + + BSONArrayBuilder arr( result.subarrayStart( "results" ) ); + int num = hopper.append( arr ); + arr.done(); + + { + BSONObjBuilder b( result.subobjStart( "stats" ) ); + b.append( "time" , t.millis() ); + b.appendNumber( "btreeMatches" , btreeMatches ); + b.append( "n" , num ); + b.done(); + } + } + + const IndexDetails* getDetails() const { + return _spec->getDetails(); + } + + string _geo; + vector _other; + + BSONObj _order; + + double _bucketSize; + }; + + class GeoHaystackSearchIndexPlugin : public IndexPlugin { + public: + GeoHaystackSearchIndexPlugin() : IndexPlugin( GEOSEARCHNAME ){ + } + + virtual IndexType* generate( const IndexSpec* spec ) const { + return new GeoHaystackSearchIndex( this , spec ); + } + + } nameIndexPlugin; + + + class GeoHaystackSearchCommand : public Command { + public: + GeoHaystackSearchCommand() : Command( "geoSearch" ){} + virtual LockType locktype() const { return READ; } + bool slaveOk() const { return true; } + bool slaveOverrideOk() const { return true; } + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ + + string ns = dbname + "." + cmdObj.firstElement().valuestr(); + + NamespaceDetails * d = nsdetails( ns.c_str() ); + if ( ! d ){ + errmsg = "can't find ns"; + return false; + } + + vector idxs; + d->findIndexByType( GEOSEARCHNAME , idxs ); + if ( idxs.size() == 0 ){ + errmsg = "no geoSearch index"; + return false; + } + if ( idxs.size() > 1 ){ + errmsg = "more than 1 geosearch index"; + return false; + } + + int idxNum = idxs[0]; + + IndexDetails& id = d->idx( idxNum ); + GeoHaystackSearchIndex * si = (GeoHaystackSearchIndex*)id.getSpec().getType(); + assert( &id == si->getDetails() ); + + BSONElement n = cmdObj["near"]; + BSONElement maxDistance = cmdObj["maxDistance"]; + BSONElement search = cmdObj["search"]; + + uassert( 13318 , "near needs to be an array" , n.isABSONObj() ); + uassert( 13319 , "maxDistance needs a number" , maxDistance.isNumber() ); + uassert( 13320 , "search needs to be an object" , search.type() == Object ); + + unsigned limit = 50; + if ( cmdObj["limit"].isNumber() ) + limit = (unsigned)cmdObj["limit"].numberInt(); + + si->searchCommand( d , idxNum , n.Obj() , maxDistance.numberDouble() , search.Obj() , result , limit ); + + return 1; + } + + } nameSearchCommand; + + + + + +} diff -Nru mongodb-1.4.4/db/helpers/dblogger.h mongodb-1.6.3/db/helpers/dblogger.h --- mongodb-1.4.4/db/helpers/dblogger.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/helpers/dblogger.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,31 @@ +// @file db.logger.h + +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#pragma once + +namespace mongo { + + /** helper to log (and read log) of a capped collection in the database */ + class DBLogger { + bool _inited; + public: + const string _ns; + DBLogger(string ns) : _inited(false), _ns(ns){ } + }; + +} diff -Nru mongodb-1.4.4/db/index.cpp mongodb-1.6.3/db/index.cpp --- mongodb-1.4.4/db/index.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/index.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "namespace.h" #include "index.h" #include "btree.h" @@ -25,32 +25,6 @@ namespace mongo { - map * IndexPlugin::_plugins; - - IndexType::IndexType( const IndexPlugin * plugin , const IndexSpec * spec ) - : _plugin( plugin ) , _spec( spec ){ - - } - - IndexType::~IndexType(){ - } - - const BSONObj& IndexType::keyPattern() const { - return _spec->keyPattern; - } - - IndexPlugin::IndexPlugin( const string& name ) - : _name( name ){ - if ( ! _plugins ) - _plugins = new map(); - (*_plugins)[name] = this; - } - - int IndexType::compare( const BSONObj& l , const BSONObj& r ) const { - return l.woCompare( r , _spec->keyPattern ); - } - - int removeFromSysIndexes(const char *ns, const char *idxName) { string system_indexes = cc().database()->name + ".system.indexes"; BSONObjBuilder b; @@ -111,172 +85,6 @@ wassert( n == 1 ); } - void IndexSpec::reset( const IndexDetails * details ){ - _details = details; - reset( details->info ); - } - - void IndexSpec::reset( const DiskLoc& loc ){ - info = loc.obj(); - keyPattern = info["key"].embeddedObjectUserCheck(); - if ( keyPattern.objsize() == 0 ) { - out() << info.toString() << endl; - assert(false); - } - _init(); - } - - - void IndexSpec::_init(){ - assert( keyPattern.objsize() ); - - string pluginName = ""; - - BSONObjIterator i( keyPattern ); - BSONObjBuilder nullKeyB; - while( i.more() ) { - BSONElement e = i.next(); - _fieldNames.push_back( e.fieldName() ); - _fixed.push_back( BSONElement() ); - nullKeyB.appendNull( "" ); - if ( e.type() == String ){ - uassert( 13007 , "can only have 1 index plugin / bad index key pattern" , pluginName.size() == 0 ); - pluginName = e.valuestr(); - } - - } - - _nullKey = nullKeyB.obj(); - - BSONObjBuilder b; - b.appendNull( "" ); - _nullObj = b.obj(); - _nullElt = _nullObj.firstElement(); - - if ( pluginName.size() ){ - IndexPlugin * plugin = IndexPlugin::get( pluginName ); - if ( ! plugin ){ - log() << "warning: can't find plugin [" << pluginName << "]" << endl; - } - else { - _indexType.reset( plugin->generate( this ) ); - } - } - _finishedInit = true; - } - - - void IndexSpec::getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { - if ( _indexType.get() ){ - _indexType->getKeys( obj , keys ); - return; - } - vector fieldNames( _fieldNames ); - vector fixed( _fixed ); - _getKeys( fieldNames , fixed , obj, keys ); - if ( keys.empty() ) - keys.insert( _nullKey ); - } - - void IndexSpec::_getKeys( vector fieldNames , vector fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { - BSONElement arrElt; - unsigned arrIdx = ~0; - for( unsigned i = 0; i < fieldNames.size(); ++i ) { - if ( *fieldNames[ i ] == '\0' ) - continue; - BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] ); - if ( e.eoo() ) - e = _nullElt; // no matching field - if ( e.type() != Array ) - fieldNames[ i ] = ""; // no matching field or non-array match - if ( *fieldNames[ i ] == '\0' ) - fixed[ i ] = e; // no need for further object expansion (though array expansion still possible) - if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here - arrIdx = i; - arrElt = e; - } - // enforce single array path here - uassert( 10088 , "cannot index parallel arrays", e.type() != Array || e.rawdata() == arrElt.rawdata() ); - } - - bool allFound = true; // have we found elements for all field names in the key spec? - for( vector::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ){ - if ( **i != '\0' ){ - allFound = false; - break; - } - } - - bool insertArrayNull = false; - - if ( allFound ) { - if ( arrElt.eoo() ) { - // no terminal array element to expand - BSONObjBuilder b(_sizeTracker); - for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) - b.appendAs( *i, "" ); - keys.insert( b.obj() ); - } - else { - // terminal array element to expand, so generate all keys - BSONObjIterator i( arrElt.embeddedObject() ); - if ( i.more() ){ - while( i.more() ) { - BSONObjBuilder b(_sizeTracker); - for( unsigned j = 0; j < fixed.size(); ++j ) { - if ( j == arrIdx ) - b.appendAs( i.next(), "" ); - else - b.appendAs( fixed[ j ], "" ); - } - keys.insert( b.obj() ); - } - } - else if ( fixed.size() > 1 ){ - insertArrayNull = true; - } - } - } else { - // nonterminal array element to expand, so recurse - assert( !arrElt.eoo() ); - BSONObjIterator i( arrElt.embeddedObject() ); - if ( i.more() ){ - while( i.more() ) { - BSONElement e = i.next(); - if ( e.type() == Object ) - _getKeys( fieldNames, fixed, e.embeddedObject(), keys ); - } - } - else { - insertArrayNull = true; - } - } - - if ( insertArrayNull ){ - // x : [] - need to insert undefined - BSONObjBuilder b(_sizeTracker); - for( unsigned j = 0; j < fixed.size(); ++j ) { - if ( j == arrIdx ){ - b.appendUndefined( "" ); - } - else { - BSONElement e = fixed[j]; - if ( e.eoo() ) - b.appendNull( "" ); - else - b.appendAs( e , "" ); - } - } - keys.insert( b.obj() ); - } - - } - - /* Pull out the relevant key objects from obj, so we - can index them. Note that the set is multiple elements - only when it's a "multikey" array. - Keys will be left empty if key not found in the object. - */ void IndexDetails::getKeysFromObject( const BSONObj& obj, BSONObjSetDefaultOrder& keys) const { getSpec().getKeys( obj, keys ); } @@ -297,7 +105,7 @@ } } - void getIndexChanges(vector& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj) { + void getIndexChanges(vector& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj, bool &changedId) { int z = d.nIndexesBeingBuilt(); v.resize(z); NamespaceDetails::IndexIterator i = d.ii(); @@ -311,6 +119,9 @@ d.setIndexIsMultikey(i); setDifference(ch.oldkeys, ch.newkeys, ch.removed); setDifference(ch.newkeys, ch.oldkeys, ch.added); + if ( ch.removed.size() > 0 && ch.added.size() > 0 && idx.isIdIndex() ) { + changedId = true; + } } } @@ -390,7 +201,7 @@ return false; } sourceCollection = nsdetails(sourceNS.c_str()); - log() << "info: creating collection " << sourceNS << " on add index\n"; + tlog() << "info: creating collection " << sourceNS << " on add index\n"; assert( sourceCollection ); } @@ -422,40 +233,20 @@ return true; } - bool anyElementNamesMatch( const BSONObj& a , const BSONObj& b ){ - BSONObjIterator x(a); - while ( x.more() ){ - BSONElement e = x.next(); - BSONObjIterator y(b); - while ( y.more() ){ - BSONElement f = y.next(); - FieldCompareResult res = compareDottedFieldNames( e.fieldName() , f.fieldName() ); - if ( res == SAME || res == LEFT_SUBFIELD || res == RIGHT_SUBFIELD ) - return true; - } - } - return false; - } - - IndexSuitability IndexSpec::suitability( const BSONObj& query , const BSONObj& order ) const { - if ( _indexType.get() ) - return _indexType->suitability( query , order ); - return _suitability( query , order ); - } - - IndexSuitability IndexSpec::_suitability( const BSONObj& query , const BSONObj& order ) const { - // TODO: optimize - if ( anyElementNamesMatch( keyPattern , query ) == 0 && anyElementNamesMatch( keyPattern , order ) == 0 ) - return USELESS; - return HELPFUL; - } - IndexSuitability IndexType::suitability( const BSONObj& query , const BSONObj& order ) const { - return _spec->_suitability( query , order ); + void IndexSpec::reset( const IndexDetails * details ){ + _details = details; + reset( details->info ); } - bool IndexType::scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const { - return ! order.isEmpty(); + void IndexSpec::reset( const DiskLoc& loc ){ + info = loc.obj(); + keyPattern = info["key"].embeddedObjectUserCheck(); + if ( keyPattern.objsize() == 0 ) { + out() << info.toString() << endl; + assert(false); + } + _init(); } } diff -Nru mongodb-1.4.4/db/index_geo2d.cpp mongodb-1.6.3/db/index_geo2d.cpp --- mongodb-1.4.4/db/index_geo2d.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/index_geo2d.cpp 1969-12-31 16:00:00.000000000 -0800 @@ -1,1694 +0,0 @@ -// geo2d.cpp - -/** -* Copyright (C) 2008 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see . -*/ - -#include "stdafx.h" -#include "namespace.h" -#include "jsobj.h" -#include "index.h" -#include "../util/unittest.h" -#include "commands.h" -#include "pdfile.h" -#include "btree.h" -#include "curop.h" -#include "matcher.h" - -//#define GEODEBUG(x) cout << x << endl; -#define GEODEBUG(x) - -namespace mongo { - - const string GEO2DNAME = "2d"; - - class GeoBitSets { - public: - GeoBitSets(){ - for ( int i=0; i<32; i++ ){ - masks32[i] = ( 1 << ( 31 - i ) ); - } - for ( int i=0; i<64; i++ ){ - masks64[i] = ( 1LL << ( 63 - i ) ); - } - } - int masks32[32]; - long long masks64[64]; - } geoBitSets; - - - class GeoHash { - public: - GeoHash() - : _hash(0),_bits(0){ - } - - GeoHash( const char * hash ){ - init( hash ); - } - - GeoHash( const string& hash ){ - init( hash ); - } - - GeoHash( const BSONElement& e , unsigned bits=32 ){ - _bits = bits; - if ( e.type() == BinData ){ - int len = 0; - _copy( (char*)&_hash , e.binData( len ) ); - assert( len == 8 ); - _bits = bits; - } - else { - cout << "GeoHash cons e : " << e << endl; - uassert(13047,"wrong type for geo index. if you're using a pre-release version, need to rebuild index",0); - } - _fix(); - } - - GeoHash( unsigned x , unsigned y , unsigned bits=32){ - init( x , y , bits ); - } - - GeoHash( const GeoHash& old ){ - _hash = old._hash; - _bits = old._bits; - } - - GeoHash( long long hash , unsigned bits ) - : _hash( hash ) , _bits( bits ){ - _fix(); - } - - void init( unsigned x , unsigned y , unsigned bits ){ - assert( bits <= 32 ); - _hash = 0; - _bits = bits; - for ( unsigned i=0; i> (64-(other._bits*2)); - return x == 0; - } - - - string toString() const { - StringBuilder buf( _bits * 2 ); - for ( unsigned x=0; x<_bits*2; x++ ) - buf.append( _hash & geoBitSets.masks64[x] ? "1" : "0" ); - return buf.str(); - } - - string toStringHex1() const { - stringstream ss; - ss << hex << _hash; - return ss.str(); - } - - void init( const string& s ){ - _hash = 0; - _bits = s.size() / 2; - for ( unsigned pos=0; pos 0; - } - - void move( int x , int y ){ - assert( _bits ); - _move( 0 , x ); - _move( 1 , y ); - } - - void _move( unsigned offset , int d ){ - if ( d == 0 ) - return; - assert( d <= 1 && d>= -1 ); // TEMP - - bool from, to; - if ( d > 0 ){ - from = 0; - to = 1; - } - else { - from = 1; - to = 0; - } - - unsigned pos = ( _bits * 2 ) - 1; - if ( offset == 0 ) - pos--; - while ( true ){ - if ( getBit(pos) == from ){ - setBit( pos , to ); - return; - } - - if ( pos < 2 ){ - // overflow - for ( ; pos < ( _bits * 2 ) ; pos += 2 ){ - setBit( pos , from ); - } - return; - } - - setBit( pos , from ); - pos -= 2; - } - - assert(0); - } - - GeoHash& operator=(const GeoHash& h) { - _hash = h._hash; - _bits = h._bits; - return *this; - } - - bool operator==(const GeoHash& h ){ - return _hash == h._hash && _bits == h._bits; - } - - GeoHash& operator+=( const char * s ) { - unsigned pos = _bits * 2; - _bits += strlen(s) / 2; - assert( _bits <= 32 ); - while ( s[0] ){ - if ( s[0] == '1' ) - setBit( pos , 1 ); - pos++; - s++; - } - - return *this; - } - - GeoHash operator+( const char * s ) const { - GeoHash n = *this; - n+=s; - return n; - } - - void _fix(){ - if ( ( _hash << ( _bits * 2 ) ) == 0 ) - return; - long long mask = 0; - for ( unsigned i=0; i<_bits*2; i++ ) - mask |= geoBitSets.masks64[i]; - _hash &= mask; - } - - void append( BSONObjBuilder& b , const char * name ) const { - char buf[8]; - _copy( buf , (char*)&_hash ); - b.appendBinData( name , 8 , bdtCustom , buf ); - } - - long long getHash() const { - return _hash; - } - - GeoHash commonPrefix( const GeoHash& other ) const { - unsigned i=0; - for ( ; i<_bits && ikeyPattern ); - while ( i.more() ){ - BSONElement e = i.next(); - if ( e.type() == String && GEO2DNAME == e.valuestr() ){ - uassert( 13022 , "can't have 2 geo field" , _geo.size() == 0 ); - uassert( 13023 , "2d has to be first in index" , _other.size() == 0 ); - _geo = e.fieldName(); - } - else { - _other.push_back( e.fieldName() ); - } - orderBuilder.append( "" , 1 ); - } - - uassert( 13024 , "no geo field specified" , _geo.size() ); - - _bits = _configval( spec , "bits" , 26 ); // for lat/long, ~ 1ft - - uassert( 13028 , "can't have more than 32 bits in geo index" , _bits <= 32 ); - - _max = _configval( spec , "max" , 180 ); - _min = _configval( spec , "min" , -180 ); - - _scaling = (1024*1024*1024*4.0)/(_max-_min); - - _order = orderBuilder.obj(); - } - - int _configval( const IndexSpec* spec , const string& name , int def ){ - BSONElement e = spec->info[name]; - if ( e.isNumber() ) - return e.numberInt(); - return def; - } - - ~Geo2dType(){ - - } - - virtual BSONObj fixKey( const BSONObj& in ) { - if ( in.firstElement().type() == BinData ) - return in; - - BSONObjBuilder b(in.objsize()+16); - - if ( in.firstElement().isABSONObj() ) - _hash( in.firstElement().embeddedObject() ).append( b , "" ); - else if ( in.firstElement().type() == String ) - GeoHash( in.firstElement().valuestr() ).append( b , "" ); - else if ( in.firstElement().type() == RegEx ) - GeoHash( in.firstElement().regex() ).append( b , "" ); - else - return in; - - BSONObjIterator i(in); - i.next(); - while ( i.more() ) - b.append( i.next() ); - return b.obj(); - } - - virtual void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { - BSONElement geo = obj.getFieldDotted(_geo.c_str()); - if ( geo.eoo() ) - return; - - BSONObjBuilder b(64); - - if ( ! geo.isABSONObj() ) - return; - - BSONObj embed = geo.embeddedObject(); - if ( embed.isEmpty() ) - return; - - _hash( embed ).append( b , "" ); - - for ( size_t i=0; i<_other.size(); i++ ){ - BSONElement e = obj[_other[i]]; - if ( e.eoo() ) - e = _spec->missingField(); - b.appendAs( e , "" ); - } - keys.insert( b.obj() ); - } - - GeoHash _tohash( const BSONElement& e ) const { - if ( e.isABSONObj() ) - return _hash( e.embeddedObject() ); - - return GeoHash( e , _bits ); - } - - GeoHash _hash( const BSONObj& o ) const { - BSONObjIterator i(o); - uassert( 13067 , "geo field is empty" , i.more() ); - BSONElement x = i.next(); - uassert( 13068 , "geo field only has 1 element" , i.more() ); - BSONElement y = i.next(); - - uassert( 13026 , "geo values have to be numbers" , x.isNumber() && y.isNumber() ); - - return _hash( x.number() , y.number() ); - } - - GeoHash _hash( double x , double y ) const { - return GeoHash( _convert(x), _convert(y) , _bits ); - } - - BSONObj _unhash( const GeoHash& h ) const { - unsigned x , y; - h.unhash( x , y ); - BSONObjBuilder b; - b.append( "x" , _unconvert( x ) ); - b.append( "y" , _unconvert( y ) ); - return b.obj(); - } - - unsigned _convert( double in ) const { - uassert( 13027 , "point not in range" , in <= _max && in >= _min ); - in -= _min; - assert( in > 0 ); - return (unsigned)(in * _scaling); - } - - double _unconvert( unsigned in ) const { - double x = in; - x /= _scaling; - x += _min; - return x; - } - - void _unconvert( const GeoHash& h , double& x , double& y ) const { - unsigned a,b; - h.unhash(a,b); - x = _unconvert( a ); - y = _unconvert( b ); - } - - double distance( const GeoHash& a , const GeoHash& b ) const { - double ax,ay,bx,by; - _unconvert( a , ax , ay ); - _unconvert( b , bx , by ); - - double dx = bx - ax; - double dy = by - ay; - - return sqrt( ( dx * dx ) + ( dy * dy ) ); - } - - double size( const GeoHash& a ) const { - GeoHash b = a; - b.move( 1 , 1 ); - return distance( a , b ); - } - - const IndexDetails* getDetails() const { - return _spec->getDetails(); - } - - virtual auto_ptr newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const; - - virtual IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const { - BSONElement e = query.getFieldDotted(_geo.c_str()); - switch ( e.type() ){ - case Object: { - BSONObj sub = e.embeddedObject(); - switch ( sub.firstElement().getGtLtOp() ){ - case BSONObj::opNEAR: - case BSONObj::opWITHIN: - return OPTIMAL; - default:; - } - } - case Array: - return HELPFUL; - default: - return USELESS; - } - } - - string _geo; - vector _other; - - unsigned _bits; - int _max; - int _min; - double _scaling; - - BSONObj _order; - }; - - class Point { - public: - - Point( const Geo2dType * g , const GeoHash& hash ){ - g->_unconvert( hash , _x , _y ); - } - - Point( double x , double y ) - : _x( x ) , _y( y ){ - } - - Point() : _x(0),_y(0){ - } - - GeoHash hash( const Geo2dType * g ){ - return g->_hash( _x , _y ); - } - - string toString() const { - StringBuilder buf(32); - buf << "(" << _x << "," << _y << ")"; - return buf.str(); - - } - - double _x; - double _y; - }; - - class Box { - public: - - Box( const Geo2dType * g , const GeoHash& hash ) - : _min( g , hash ) , - _max( _min._x + g->size( hash ) , _min._y + g->size( hash ) ){ - } - - Box( double x , double y , double size ) - : _min( x , y ) , - _max( x + size , y + size ){ - } - - Box( Point min , Point max ) - : _min( min ) , _max( max ){ - } - - Box(){} - - string toString() const { - StringBuilder buf(64); - buf << _min.toString() << " -->> " << _max.toString(); - return buf.str(); - } - - operator string() const { - return toString(); - } - - bool between( double min , double max , double val , double fudge=0) const { - return val + fudge >= min && val <= max + fudge; - } - - bool mid( double amin , double amax , double bmin , double bmax , bool min , double& res ) const { - assert( amin < amax ); - assert( bmin < bmax ); - - if ( amin < bmin ){ - if ( amax < bmin ) - return false; - res = min ? bmin : amax; - return true; - } - if ( amin > bmax ) - return false; - res = min ? amin : bmax; - return true; - } - - double intersects( const Box& other ) const { - - Point boundMin(0,0); - Point boundMax(0,0); - - if ( mid( _min._x , _max._x , other._min._x , other._max._x , true , boundMin._x ) == false || - mid( _min._x , _max._x , other._min._x , other._max._x , false , boundMax._x ) == false || - mid( _min._y , _max._y , other._min._y , other._max._y , true , boundMin._y ) == false || - mid( _min._y , _max._y , other._min._y , other._max._y , false , boundMax._y ) == false ) - return 0; - - Box intersection( boundMin , boundMax ); - - return intersection.area() / ( ( area() + other.area() ) / 2 ); - } - - double area() const { - return ( _max._x - _min._x ) * ( _max._y - _min._y ); - } - - Point center() const { - return Point( ( _min._x + _max._x ) / 2 , - ( _min._y + _max._y ) / 2 ); - } - - bool inside( Point p , double fudge = 0 ){ - bool res = inside( p._x , p._y , fudge ); - //cout << "is : " << p.toString() << " in " << toString() << " = " << res << endl; - return res; - } - - bool inside( double x , double y , double fudge = 0 ){ - return - between( _min._x , _max._x , x , fudge ) && - between( _min._y , _max._y , y , fudge ); - } - - Point _min; - Point _max; - }; - - class Geo2dPlugin : public IndexPlugin { - public: - Geo2dPlugin() : IndexPlugin( GEO2DNAME ){ - } - - virtual IndexType* generate( const IndexSpec* spec ) const { - return new Geo2dType( this , spec ); - } - } geo2dplugin; - - struct GeoUnitTest : public UnitTest { - - int round( double d ){ - return (int)(.5+(d*1000)); - } - -#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == b ); } - - void run(){ - assert( ! GeoHash::isBitSet( 0 , 0 ) ); - assert( ! GeoHash::isBitSet( 0 , 31 ) ); - assert( GeoHash::isBitSet( 1 , 31 ) ); - - IndexSpec i( BSON( "loc" << "2d" ) ); - Geo2dType g( &geo2dplugin , &i ); - { - double x = 73.01212; - double y = 41.352964; - BSONObj in = BSON( "x" << x << "y" << y ); - GeoHash h = g._hash( in ); - BSONObj out = g._unhash( h ); - assert( round(x) == round( out["x"].number() ) ); - assert( round(y) == round( out["y"].number() ) ); - assert( round( in["x"].number() ) == round( out["x"].number() ) ); - assert( round( in["y"].number() ) == round( out["y"].number() ) ); - } - - { - double x = -73.01212; - double y = 41.352964; - BSONObj in = BSON( "x" << x << "y" << y ); - GeoHash h = g._hash( in ); - BSONObj out = g._unhash( h ); - assert( round(x) == round( out["x"].number() ) ); - assert( round(y) == round( out["y"].number() ) ); - assert( round( in["x"].number() ) == round( out["x"].number() ) ); - assert( round( in["y"].number() ) == round( out["y"].number() ) ); - } - - { - GeoHash h( "0000" ); - h.move( 0 , 1 ); - GEOHEQ( h , "0001" ); - h.move( 0 , -1 ); - GEOHEQ( h , "0000" ); - - h.init( "0001" ); - h.move( 0 , 1 ); - GEOHEQ( h , "0100" ); - h.move( 0 , -1 ); - GEOHEQ( h , "0001" ); - - - h.init( "0000" ); - h.move( 1 , 0 ); - GEOHEQ( h , "0010" ); - } - - { - Box b( 5 , 5 , 2 ); - assert( "(5,5) -->> (7,7)" == b.toString() ); - } - - { - GeoHash a = g._hash( 1 , 1 ); - GeoHash b = g._hash( 4 , 5 ); - assert( 5 == (int)(g.distance( a , b ) ) ); - a = g._hash( 50 , 50 ); - b = g._hash( 42 , 44 ); - assert( round(10) == round(g.distance( a , b )) ); - } - - { - GeoHash x("0000"); - assert( 0 == x.getHash() ); - x.init( 0 , 1 , 32 ); - GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" ) - - assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) ); - assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) ); - } - - { - GeoHash x("1010"); - GEOHEQ( x , "1010" ); - GeoHash y = x + "01"; - GEOHEQ( y , "101001" ); - } - - { - - GeoHash a = g._hash( 5 , 5 ); - GeoHash b = g._hash( 5 , 7 ); - GeoHash c = g._hash( 100 , 100 ); - /* - cout << "a: " << a << endl; - cout << "b: " << b << endl; - cout << "c: " << c << endl; - - cout << "a: " << a.toStringHex1() << endl; - cout << "b: " << b.toStringHex1() << endl; - cout << "c: " << c.toStringHex1() << endl; - */ - BSONObj oa = a.wrap(); - BSONObj ob = b.wrap(); - BSONObj oc = c.wrap(); - /* - cout << "a: " << oa.hexDump() << endl; - cout << "b: " << ob.hexDump() << endl; - cout << "c: " << oc.hexDump() << endl; - */ - assert( oa.woCompare( ob ) < 0 ); - assert( oa.woCompare( oc ) < 0 ); - - } - - { - GeoHash x( "000000" ); - x.move( -1 , 0 ); - GEOHEQ( x , "101010" ); - x.move( 1 , -1 ); - GEOHEQ( x , "010101" ); - x.move( 0 , 1 ); - GEOHEQ( x , "000000" ); - } - - { - GeoHash prefix( "110011000000" ); - GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" ); - assert( ! entry.hasPrefix( prefix ) ); - - entry = "1100110000001100000111000001110000011100000111000001000000000000"; - assert( entry.toString().find( prefix.toString() ) == 0 ); - assert( entry.hasPrefix( GeoHash( "1100" ) ) ); - assert( entry.hasPrefix( prefix ) ); - } - - { - GeoHash a = g._hash( 50 , 50 ); - GeoHash b = g._hash( 48 , 54 ); - assert( round( 4.47214 ) == round( g.distance( a , b ) ) ); - } - - - { - Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) ); - assert( b.inside( 29.763 , -95.363 ) ); - assert( ! b.inside( 32.9570255 , -96.1082497 ) ); - assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) ); - } - - { - GeoHash a( "11001111" ); - assert( GeoHash( "11" ) == a.commonPrefix( "11" ) ); - assert( GeoHash( "11" ) == a.commonPrefix( "11110000" ) ); - } - - } - } geoUnitTest; - - class GeoPoint { - public: - GeoPoint(){ - } - - GeoPoint( const KeyNode& node , double distance ) - : _key( node.key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _distance( distance ){ - } - - GeoPoint( const BSONObj& key , DiskLoc loc , double distance ) - : _key(key) , _loc(loc) , _o( loc.obj() ) , _distance( distance ){ - } - - bool operator<( const GeoPoint& other ) const { - return _distance < other._distance; - } - - bool isEmpty() const { - return _o.isEmpty(); - } - - BSONObj _key; - DiskLoc _loc; - BSONObj _o; - double _distance; - }; - - class GeoAccumulator { - public: - GeoAccumulator( const Geo2dType * g , const BSONObj& filter ) - : _g(g) , _lookedAt(0) , _objectsLoaded(0) , _found(0) { - if ( ! filter.isEmpty() ){ - _matcher.reset( new CoveredIndexMatcher( filter , g->keyPattern() ) ); - } - } - - virtual ~GeoAccumulator(){ - } - - virtual void add( const KeyNode& node ){ - // when looking at other boxes, don't want to look at some object twice - if ( _seen.count( node.recordLoc ) ){ - GEODEBUG( "\t\t\t\t already seen : " << node.recordLoc.obj()["_id"] ); - return; - } - _seen.insert( node.recordLoc ); - _lookedAt++; - - // distance check - double d = 0; - if ( ! checkDistance( GeoHash( node.key.firstElement() ) , d ) ){ - GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << d ); - return; - } - - // matcher - MatchDetails details; - if ( _matcher.get() ){ - bool good = _matcher->matches( node.key , node.recordLoc , &details ); - if ( details.loadedObject ) - _objectsLoaded++; - - if ( ! good ){ - GEODEBUG( "\t\t\t\t didn't match : " << node.recordLoc.obj()["_id"] ); - return; - } - } - - if ( ! details.loadedObject ) // dont double count - _objectsLoaded++; - - addSpecific( node , d ); - _found++; - } - - virtual void addSpecific( const KeyNode& node , double d ) = 0; - virtual bool checkDistance( const GeoHash& node , double& d ) = 0; - - long long found() const { - return _found; - } - - const Geo2dType * _g; - set _seen; - auto_ptr _matcher; - - long long _lookedAt; - long long _objectsLoaded; - long long _found; - }; - - class GeoHopper : public GeoAccumulator { - public: - typedef multiset Holder; - - GeoHopper( const Geo2dType * g , unsigned max , const GeoHash& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits::max() ) - : GeoAccumulator( g , filter ) , _max( max ) , _near( n ), _maxDistance( maxDistance ) { - - } - - virtual bool checkDistance( const GeoHash& h , double& d ){ - d = _g->distance( _near , h ); - bool good = d < _maxDistance && ( _points.size() < _max || d < farthest() ); - GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near << "\t" << h << "\t" << d - << " ok: " << good << " farthest: " << farthest() ); - return good; - } - - virtual void addSpecific( const KeyNode& node , double d ){ - GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d ); - _points.insert( GeoPoint( node.key , node.recordLoc , d ) ); - if ( _points.size() > _max ){ - _points.erase( --_points.end() ); - } - } - - double farthest(){ - if ( _points.size() == 0 ) - return -1; - - Holder::iterator i = _points.end(); - i--; - return i->_distance; - } - - unsigned _max; - GeoHash _near; - Holder _points; - double _maxDistance; - - }; - - struct BtreeLocation { - int pos; - bool found; - DiskLoc bucket; - - BSONObj key(){ - if ( bucket.isNull() ) - return BSONObj(); - return bucket.btree()->keyNode( pos ).key; - } - - bool hasPrefix( const GeoHash& hash ){ - BSONElement e = key().firstElement(); - if ( e.eoo() ) - return false; - return GeoHash( e ).hasPrefix( hash ); - } - - bool advance( int direction , int& totalFound , GeoAccumulator* all ){ - - if ( bucket.isNull() ) - return false; - bucket = bucket.btree()->advance( bucket , pos , direction , "btreelocation" ); - - return checkCur( totalFound , all ); - } - - bool checkCur( int& totalFound , GeoAccumulator* all ){ - if ( bucket.isNull() ) - return false; - - if ( bucket.btree()->isUsed(pos) ){ - totalFound++; - all->add( bucket.btree()->keyNode( pos ) ); - } - else { - GEODEBUG( "\t\t\t\t not used: " << key() ); - } - - return true; - } - - string toString(){ - stringstream ss; - ss << "bucket: " << bucket.toString() << " pos: " << pos << " found: " << found; - return ss.str(); - } - - static bool initial( const IndexDetails& id , const Geo2dType * spec , - BtreeLocation& min , BtreeLocation& max , - GeoHash start , - int & found , GeoAccumulator * hopper ){ - - min.bucket = id.head.btree()->locate( id , id.head , start.wrap() , - spec->_order , min.pos , min.found , minDiskLoc ); - min.checkCur( found , hopper ); - max = min; - - if ( min.bucket.isNull() ){ - min.bucket = id.head.btree()->locate( id , id.head , start.wrap() , - spec->_order , min.pos , min.found , minDiskLoc , -1 ); - min.checkCur( found , hopper ); - } - - return ! min.bucket.isNull() || ! max.bucket.isNull(); - } - }; - - class GeoSearch { - public: - GeoSearch( const Geo2dType * g , const GeoHash& n , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits::max() ) - : _spec( g ) , _n( n ) , _start( n ) , - _numWanted( numWanted ) , _filter( filter ) , _maxDistance( maxDistance ) , - _hopper( new GeoHopper( g , numWanted , n , filter , maxDistance ) ) - { - assert( g->getDetails() ); - _nscanned = 0; - _found = 0; - } - - void exec(){ - const IndexDetails& id = *_spec->getDetails(); - - BtreeBucket * head = id.head.btree(); - assert( head ); - /* - * Search algorithm - * 1) use geohash prefix to find X items - * 2) compute max distance from want to an item - * 3) find optimal set of boxes that complete circle - * 4) use regular btree cursors to scan those boxes - */ - - GeoHopper * hopper = _hopper.get(); - - _prefix = _start; - { // 1 regular geo hash algorithm - - - BtreeLocation min,max; - if ( ! BtreeLocation::initial( id , _spec , min , max , _n , _found , hopper ) ) - return; - - while ( _hopper->found() < _numWanted ){ - GEODEBUG( _prefix << "\t" << _found << "\t DESC" ); - while ( min.hasPrefix( _prefix ) && min.advance( -1 , _found , hopper ) ) - _nscanned++; - GEODEBUG( _prefix << "\t" << _found << "\t ASC" ); - while ( max.hasPrefix( _prefix ) && max.advance( 1 , _found , hopper ) ) - _nscanned++; - if ( ! _prefix.constrains() ) - break; - _prefix = _prefix.up(); - - double temp = _spec->distance( _prefix , _start ); - if ( temp > ( _maxDistance * 2 ) ) - break; - } - } - GEODEBUG( "done part 1" ); - if ( _found && _prefix.constrains() ){ - // 2 - Point center( _spec , _n ); - double boxSize = _spec->size( _prefix ); - double farthest = hopper->farthest(); - if ( farthest > boxSize ) - boxSize = farthest; - Box want( center._x - ( boxSize / 2 ) , center._y - ( boxSize / 2 ) , boxSize ); - while ( _spec->size( _prefix ) < boxSize ) - _prefix = _prefix.up(); - log(1) << "want: " << want << " found:" << _found << " hash size:" << _spec->size( _prefix ) << endl; - - for ( int x=-1; x<=1; x++ ){ - for ( int y=-1; y<=1; y++ ){ - GeoHash toscan = _prefix; - toscan.move( x , y ); - - // 3 & 4 - doBox( id , want , toscan ); - } - } - } - GEODEBUG( "done search" ) - - } - - void doBox( const IndexDetails& id , const Box& want , const GeoHash& toscan , int depth = 0 ){ - Box testBox( _spec , toscan ); - if ( logLevel > 0 ) log(1) << "\t doBox: " << testBox << "\t" << toscan.toString() << endl; - - double intPer = testBox.intersects( want ); - - if ( intPer <= 0 ) - return; - - if ( intPer < .5 && depth < 3 ){ - doBox( id , want , toscan + "00" , depth + 1); - doBox( id , want , toscan + "01" , depth + 1); - doBox( id , want , toscan + "10" , depth + 1); - doBox( id , want , toscan + "11" , depth + 1); - return; - } - - BtreeLocation loc; - loc.bucket = id.head.btree()->locate( id , id.head , toscan.wrap() , _spec->_order , - loc.pos , loc.found , minDiskLoc ); - loc.checkCur( _found , _hopper.get() ); - while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ) - _nscanned++; - - } - - - const Geo2dType * _spec; - - GeoHash _n; - GeoHash _start; - GeoHash _prefix; - int _numWanted; - BSONObj _filter; - double _maxDistance; - shared_ptr _hopper; - - long long _nscanned; - int _found; - }; - - class GeoCursorBase : public Cursor { - public: - GeoCursorBase( const Geo2dType * spec ) - : _spec( spec ), _id( _spec->getDetails() ){ - - } - - virtual DiskLoc refLoc(){ return DiskLoc(); } - - virtual BSONObj indexKeyPattern() { - return _spec->keyPattern(); - } - - virtual void noteLocation() { - assert(0); - } - - /* called before query getmore block is iterated */ - virtual void checkLocation() { - assert(0); - } - - virtual bool supportGetMore() { return false; } - - virtual bool getsetdup(DiskLoc loc){ - return false; - } - - const Geo2dType * _spec; - const IndexDetails * _id; - }; - - class GeoSearchCursor : public GeoCursorBase { - public: - GeoSearchCursor( shared_ptr s ) - : GeoCursorBase( s->_spec ) , - _s( s ) , _cur( s->_hopper->_points.begin() ) , _end( s->_hopper->_points.end() ) { - } - - virtual ~GeoSearchCursor() {} - - virtual bool ok(){ - return _cur != _end; - } - - virtual Record* _current(){ assert(ok()); return _cur->_loc.rec(); } - virtual BSONObj current(){ assert(ok()); return _cur->_o; } - virtual DiskLoc currLoc(){ assert(ok()); return _cur->_loc; } - virtual bool advance(){ _cur++; return ok(); } - virtual BSONObj currKey() const { return _cur->_key; } - - virtual string toString() { - return "GeoSearchCursor"; - } - - - virtual BSONObj prettyStartKey() const { - return BSON( _s->_spec->_geo << _s->_prefix.toString() ); - } - virtual BSONObj prettyEndKey() const { - GeoHash temp = _s->_prefix; - temp.move( 1 , 1 ); - return BSON( _s->_spec->_geo << temp.toString() ); - } - - - shared_ptr _s; - GeoHopper::Holder::iterator _cur; - GeoHopper::Holder::iterator _end; - }; - - class GeoBrowse : public GeoCursorBase , public GeoAccumulator { - public: - GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj() ) - : GeoCursorBase( g ) ,GeoAccumulator( g , filter ) , - _type( type ) , _filter( filter ) , _firstCall(true) { - } - - virtual string toString() { - return (string)"GeoBrowse-" + _type; - } - - virtual bool ok(){ - if ( _firstCall ){ - fillStack(); - _firstCall = false; - } - if ( ! _cur.isEmpty() || _stack.size() ) - return true; - - while ( moreToDo() ){ - fillStack(); - if ( ! _cur.isEmpty() ) - return true; - } - - return false; - } - - virtual bool advance(){ - _cur._o = BSONObj(); - - if ( _stack.size() ){ - _cur = _stack.front(); - _stack.pop_front(); - return true; - } - - if ( ! moreToDo() ) - return false; - - while ( _cur.isEmpty() && moreToDo() ) - fillStack(); - return ! _cur.isEmpty(); - } - - virtual Record* _current(){ assert(ok()); return _cur._loc.rec(); } - virtual BSONObj current(){ assert(ok()); return _cur._o; } - virtual DiskLoc currLoc(){ assert(ok()); return _cur._loc; } - virtual BSONObj currKey() const { return _cur._key; } - - - virtual bool moreToDo() = 0; - virtual void fillStack() = 0; - - virtual void addSpecific( const KeyNode& node , double d ){ - if ( _cur.isEmpty() ) - _cur = GeoPoint( node , d ); - else - _stack.push_back( GeoPoint( node , d ) ); - } - - string _type; - BSONObj _filter; - list _stack; - - GeoPoint _cur; - bool _firstCall; - - }; - - class GeoCircleBrowse : public GeoBrowse { - public: - - enum State { - START , - DOING_EXPAND , - DOING_AROUND , - DONE - } _state; - - GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() ) - : GeoBrowse( g , "circle" , filter ){ - - uassert( 13060 , "$center needs 2 fields (middle,max distance)" , circle.nFields() == 2 ); - BSONObjIterator i(circle); - _start = g->_tohash( i.next() ); - _prefix = _start; - _maxDistance = i.next().numberDouble(); - uassert( 13061 , "need a max distance > 0 " , _maxDistance > 0 ); - - _state = START; - _found = 0; - - ok(); - } - - virtual bool moreToDo(){ - return _state != DONE; - } - - virtual void fillStack(){ - if ( _state == START ){ - if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , - _prefix , _found , this ) ){ - _state = DONE; - return; - } - _state = DOING_EXPAND; - } - - if ( _state == DOING_EXPAND ){ - GEODEBUG( "circle prefix [" << _prefix << "]" ); - while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _found , this ) ); - while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _found , this ) ); - - if ( ! _prefix.constrains() ){ - GEODEBUG( "\t exhausted the btree" ); - _state = DONE; - return; - } - - if ( _g->distance( _prefix , _start ) > _maxDistance ){ - GEODEBUG( "\tpast circle bounds" ); - GeoHash tr = _prefix; - tr.move( 1 , 1 ); - if ( _g->distance( tr , _start ) > _maxDistance ) - _state = DOING_AROUND; - else - _prefix = _prefix.up(); - } - else - _prefix = _prefix.up(); - return; - } - - if ( _state == DOING_AROUND ){ - _state = DONE; - return; - } - } - - virtual bool checkDistance( const GeoHash& h , double& d ){ - d = _g->distance( _start , h ); - GEODEBUG( "\t " << h << "\t" << d ); - return d <= ( _maxDistance + .01 ); - } - - GeoHash _start; - double _maxDistance; - - int _found; - - GeoHash _prefix; - BtreeLocation _min; - BtreeLocation _max; - - }; - - class GeoBoxBrowse : public GeoBrowse { - public: - - enum State { - START , - DOING_EXPAND , - DONE - } _state; - - GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() ) - : GeoBrowse( g , "box" , filter ){ - - uassert( 13063 , "$box needs 2 fields (bottomLeft,topRight)" , box.nFields() == 2 ); - BSONObjIterator i(box); - _bl = g->_tohash( i.next() ); - _tr = g->_tohash( i.next() ); - - _want._min = Point( _g , _bl ); - _want._max = Point( _g , _tr ); - - uassert( 13064 , "need an area > 0 " , _want.area() > 0 ); - - _state = START; - _found = 0; - - Point center = _want.center(); - _prefix = _g->_hash( center._x , center._y ); - - GEODEBUG( "center : " << center.toString() << "\t" << _prefix ); - - { - GeoHash a(0LL,32); - GeoHash b(0LL,32); - b.move(1,1); - _fudge = _g->distance(a,b); - } - - ok(); - } - - virtual bool moreToDo(){ - return _state != DONE; - } - - virtual void fillStack(){ - if ( _state == START ){ - - if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , - _prefix , _found , this ) ){ - _state = DONE; - return; - } - _state = DOING_EXPAND; - } - - if ( _state == DOING_EXPAND ){ - int started = _found; - while ( started == _found || _state == DONE ){ - GEODEBUG( "box prefix [" << _prefix << "]" ); - while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _found , this ) ); - while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _found , this ) ); - - if ( _state == DONE ) - return; - - if ( ! _prefix.constrains() ){ - GEODEBUG( "box exhausted" ); - _state = DONE; - return; - } - - Box cur( _g , _prefix ); - if ( cur._min._x + _fudge < _want._min._x && - cur._min._y + _fudge < _want._min._y && - cur._max._x - _fudge > _want._max._x && - cur._max._y - _fudge > _want._max._y ){ - - _state = DONE; - GeoHash temp = _prefix.commonPrefix( cur._max.hash( _g ) ); - - GEODEBUG( "box done : " << cur.toString() << " prefix:" << _prefix << " common:" << temp ); - - if ( temp == _prefix ) - return; - _prefix = temp; - GEODEBUG( "\t one more loop" ); - continue; - } - else { - _prefix = _prefix.up(); - } - } - return; - } - - } - - virtual bool checkDistance( const GeoHash& h , double& d ){ - bool res = _want.inside( Point( _g , h ) , _fudge ); - GEODEBUG( "\t want : " << _want.toString() - << " point: " << Point( _g , h ).toString() - << " in : " << res ); - return res; - } - - GeoHash _bl; - GeoHash _tr; - Box _want; - - int _found; - - GeoHash _prefix; - BtreeLocation _min; - BtreeLocation _max; - - double _fudge; - }; - - - auto_ptr Geo2dType::newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const { - if ( numWanted < 0 ) - numWanted = numWanted * -1; - else if ( numWanted == 0 ) - numWanted = 100; - - BSONObjIterator i(query); - while ( i.more() ){ - BSONElement e = i.next(); - - if ( _geo != e.fieldName() ) - continue; - - if ( e.type() != Object ) - continue; - - switch ( e.embeddedObject().firstElement().getGtLtOp() ){ - case BSONObj::opNEAR: { - e = e.embeddedObject().firstElement(); - double maxDistance = numeric_limits::max(); - if ( e.isABSONObj() && e.embeddedObject().nFields() > 2 ){ - BSONObjIterator i(e.embeddedObject()); - i.next(); - i.next(); - BSONElement e = i.next(); - if ( e.isNumber() ) - maxDistance = e.numberDouble(); - } - shared_ptr s( new GeoSearch( this , _tohash(e) , numWanted , query , maxDistance ) ); - s->exec(); - auto_ptr c; - c.reset( new GeoSearchCursor( s ) ); - return c; - } - case BSONObj::opWITHIN: { - e = e.embeddedObject().firstElement(); - uassert( 13057 , "$within has to take an object or array" , e.isABSONObj() ); - e = e.embeddedObject().firstElement(); - string type = e.fieldName(); - if ( type == "$center" ){ - uassert( 13059 , "$center has to take an object or array" , e.isABSONObj() ); - auto_ptr c; - c.reset( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query ) ); - return c; - } - else if ( type == "$box" ){ - uassert( 13065 , "$box has to take an object or array" , e.isABSONObj() ); - auto_ptr c; - c.reset( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) ); - return c; - } - throw UserException( 13058 , (string)"unknown $with type: " + type ); - } - default: - break; - } - } - - throw UserException( 13042 , (string)"missing geo field (" + _geo + ") in : " + query.toString() ); - } - - // ------ - // commands - // ------ - - class Geo2dFindNearCmd : public Command { - public: - Geo2dFindNearCmd() : Command( "geoNear" ){} - virtual LockType locktype(){ return READ; } - bool slaveOk() { return true; } - bool slaveOverrideOk() { return true; } - bool run(const char * stupidns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ - string ns = nsToDatabase( stupidns ) + "." + cmdObj.firstElement().valuestr(); - - NamespaceDetails * d = nsdetails( ns.c_str() ); - if ( ! d ){ - errmsg = "can't find ns"; - return false; - } - - int geoIdx = -1; - { - NamespaceDetails::IndexIterator ii = d->ii(); - while ( ii.more() ){ - IndexDetails& id = ii.next(); - if ( id.getSpec().getTypeName() == GEO2DNAME ){ - if ( geoIdx >= 0 ){ - errmsg = "2 geo indexes :("; - return false; - } - geoIdx = ii.pos() - 1; - } - } - } - - if ( geoIdx < 0 ){ - errmsg = "no geo index :("; - return false; - } - - result.append( "ns" , ns ); - - IndexDetails& id = d->idx( geoIdx ); - Geo2dType * g = (Geo2dType*)id.getSpec().getType(); - assert( &id == g->getDetails() ); - - int numWanted = 100; - if ( cmdObj["num"].isNumber() ) - numWanted = cmdObj["num"].numberInt(); - - uassert(13046, "'near' param missing/invalid", !cmdObj["near"].eoo()); - const GeoHash n = g->_tohash( cmdObj["near"] ); - result.append( "near" , n.toString() ); - - BSONObj filter; - if ( cmdObj["query"].type() == Object ) - filter = cmdObj["query"].embeddedObject(); - - double maxDistance = numeric_limits::max(); - if ( cmdObj["maxDistance"].isNumber() ) - maxDistance = cmdObj["maxDistance"].number(); - - GeoSearch gs( g , n , numWanted , filter , maxDistance ); - - if ( cmdObj["start"].type() == String){ - GeoHash start = (string) cmdObj["start"].valuestr(); - gs._start = start; - } - - gs.exec(); - - double distanceMultiplier = 1; - if ( cmdObj["distanceMultiplier"].isNumber() ) - distanceMultiplier = cmdObj["distanceMultiplier"].number(); - - double totalDistance = 0; - - - BSONObjBuilder arr( result.subarrayStart( "results" ) ); - int x = 0; - for ( GeoHopper::Holder::iterator i=gs._hopper->_points.begin(); i!=gs._hopper->_points.end(); i++ ){ - const GeoPoint& p = *i; - - double dis = distanceMultiplier * p._distance; - totalDistance += dis; - - BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ).c_str() ) ); - bb.append( "dis" , dis ); - bb.append( "obj" , p._o ); - bb.done(); - } - arr.done(); - - BSONObjBuilder stats( result.subobjStart( "stats" ) ); - stats.append( "time" , cc().curop()->elapsedMillis() ); - stats.appendNumber( "btreelocs" , gs._nscanned ); - stats.appendNumber( "nscanned" , gs._hopper->_lookedAt ); - stats.appendNumber( "objectsLoaded" , gs._hopper->_objectsLoaded ); - stats.append( "avgDistance" , totalDistance / x ); - stats.done(); - - return true; - } - - } geo2dFindNearCmd; - - class GeoWalkCmd : public Command { - public: - GeoWalkCmd() : Command( "geoWalk" ){} - virtual LockType locktype(){ return READ; } - bool slaveOk() { return true; } - bool slaveOverrideOk() { return true; } - bool run(const char * stupidns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ - string ns = nsToDatabase( stupidns ) + "." + cmdObj.firstElement().valuestr(); - - NamespaceDetails * d = nsdetails( ns.c_str() ); - if ( ! d ){ - errmsg = "can't find ns"; - return false; - } - - int geoIdx = -1; - { - NamespaceDetails::IndexIterator ii = d->ii(); - while ( ii.more() ){ - IndexDetails& id = ii.next(); - if ( id.getSpec().getTypeName() == GEO2DNAME ){ - if ( geoIdx >= 0 ){ - errmsg = "2 geo indexes :("; - return false; - } - geoIdx = ii.pos() - 1; - } - } - } - - if ( geoIdx < 0 ){ - errmsg = "no geo index :("; - return false; - } - - - IndexDetails& id = d->idx( geoIdx ); - Geo2dType * g = (Geo2dType*)id.getSpec().getType(); - assert( &id == g->getDetails() ); - - int max = 100000; - - BtreeCursor c( d , geoIdx , id , BSONObj() , BSONObj() , true , 1 ); - while ( c.ok() && max-- ){ - GeoHash h( c.currKey().firstElement() ); - int len; - cout << "\t" << h.toString() - << "\t" << c.current()[g->_geo] - << "\t" << hex << h.getHash() - << "\t" << hex << ((long long*)c.currKey().firstElement().binData(len))[0] - << "\t" << c.current()["_id"] - << endl; - c.advance(); - } - - return true; - } - - } geoWalkCmd; - -} diff -Nru mongodb-1.4.4/db/index.h mongodb-1.6.3/db/index.h --- mongodb-1.4.4/db/index.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/index.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,157 +18,13 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "diskloc.h" #include "jsobj.h" -#include +#include "indexkey.h" namespace mongo { - class IndexSpec; - class IndexType; // TODO: this name sucks - class IndexPlugin; - class IndexDetails; - - enum IndexSuitability { USELESS = 0 , HELPFUL = 1 , OPTIMAL = 2 }; - - /** - * this represents an instance of a index plugin - * done this way so parsing, etc... can be cached - * so if there is a FTS IndexPlugin, for each index using FTS - * there will be 1 of these, and it can have things pre-parsed, etc... - */ - class IndexType : boost::noncopyable { - public: - IndexType( const IndexPlugin * plugin , const IndexSpec * spec ); - virtual ~IndexType(); - - virtual void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const = 0; - virtual auto_ptr newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const = 0; - - /** optional op : changes query to match what's in the index */ - virtual BSONObj fixKey( const BSONObj& in ) { return in; } - - /** optional op : compare 2 objects with regards to this index */ - virtual int compare( const BSONObj& l , const BSONObj& r ) const; - - /** @return plugin */ - const IndexPlugin * getPlugin() const { return _plugin; } - - const BSONObj& keyPattern() const; - - virtual IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const ; - - virtual bool scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const ; - - protected: - const IndexPlugin * _plugin; - const IndexSpec * _spec; - }; - - /** - * this represents a plugin - * a plugin could be something like full text search, sparse index, etc... - * 1 of these exists per type of index per server - * 1 IndexType is created per index using this plugin - */ - class IndexPlugin : boost::noncopyable { - public: - IndexPlugin( const string& name ); - virtual ~IndexPlugin(){} - - virtual IndexType* generate( const IndexSpec * spec ) const = 0; - - static IndexPlugin* get( const string& name ){ - if ( ! _plugins ) - return 0; - map::iterator i = _plugins->find( name ); - if ( i == _plugins->end() ) - return 0; - return i->second; - } - - string getName() const { return _name; } - private: - string _name; - static map * _plugins; - }; - - /* precomputed details about an index, used for inserting keys on updates - stored/cached in NamespaceDetailsTransient, or can be used standalone - */ - class IndexSpec { - public: - BSONObj keyPattern; // e.g., { name : 1 } - BSONObj info; // this is the same as IndexDetails::info.obj() - - IndexSpec() - : _details(0) , _finishedInit(false){ - } - - IndexSpec( const BSONObj& k , const BSONObj& m = BSONObj() ) - : keyPattern(k) , info(m) , _details(0) , _finishedInit(false){ - _init(); - } - - /** - this is a DiscLoc of an IndexDetails info - should have a key field - */ - IndexSpec( const DiskLoc& loc ){ - reset( loc ); - } - - void reset( const DiskLoc& loc ); - void reset( const IndexDetails * details ); - - void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const; - - BSONElement missingField() const { return _nullElt; } - - string getTypeName() const { - if ( _indexType.get() ) - return _indexType->getPlugin()->getName(); - return ""; - } - - IndexType* getType() const { - return _indexType.get(); - } - - const IndexDetails * getDetails() const { - return _details; - } - - IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const ; - - protected: - - IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ; - - void _getKeys( vector fieldNames , vector fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const; - - BSONSizeTracker _sizeTracker; - - vector _fieldNames; - vector _fixed; - BSONObj _nullKey; - - BSONObj _nullObj; - BSONElement _nullElt; - - shared_ptr _indexType; - - const IndexDetails * _details; - - void _init(); - - public: - bool _finishedInit; - - friend class IndexType; - }; - /* Details about a particular index. There is one of these effectively for each object in system.namespaces (although this also includes the head pointer, which is not in that collection). @@ -275,7 +131,7 @@ const IndexSpec& getSpec() const; - operator string() const { + string toString() const { return info.obj().toString(); } }; @@ -300,6 +156,7 @@ }; class NamespaceDetails; - void getIndexChanges(vector& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj); + // changedId should be initialized to false + void getIndexChanges(vector& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj, bool &cangedId); void dupCheck(vector& v, NamespaceDetails& d, DiskLoc curObjLoc); } // namespace mongo diff -Nru mongodb-1.4.4/db/indexkey.cpp mongodb-1.6.3/db/indexkey.cpp --- mongodb-1.4.4/db/indexkey.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/indexkey.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,238 @@ +// index_key.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "namespace.h" +#include "index.h" +#include "btree.h" +#include "query.h" +#include "background.h" + +namespace mongo { + + map * IndexPlugin::_plugins; + + IndexType::IndexType( const IndexPlugin * plugin , const IndexSpec * spec ) + : _plugin( plugin ) , _spec( spec ){ + + } + + IndexType::~IndexType(){ + } + + const BSONObj& IndexType::keyPattern() const { + return _spec->keyPattern; + } + + IndexPlugin::IndexPlugin( const string& name ) + : _name( name ){ + if ( ! _plugins ) + _plugins = new map(); + (*_plugins)[name] = this; + } + + int IndexType::compare( const BSONObj& l , const BSONObj& r ) const { + return l.woCompare( r , _spec->keyPattern ); + } + + void IndexSpec::_init(){ + assert( keyPattern.objsize() ); + + string pluginName = ""; + + BSONObjIterator i( keyPattern ); + BSONObjBuilder nullKeyB; + while( i.more() ) { + BSONElement e = i.next(); + _fieldNames.push_back( e.fieldName() ); + _fixed.push_back( BSONElement() ); + nullKeyB.appendNull( "" ); + if ( e.type() == String ){ + uassert( 13007 , "can only have 1 index plugin / bad index key pattern" , pluginName.size() == 0 ); + pluginName = e.valuestr(); + } + + } + + _nullKey = nullKeyB.obj(); + + BSONObjBuilder b; + b.appendNull( "" ); + _nullObj = b.obj(); + _nullElt = _nullObj.firstElement(); + + if ( pluginName.size() ){ + IndexPlugin * plugin = IndexPlugin::get( pluginName ); + if ( ! plugin ){ + log() << "warning: can't find plugin [" << pluginName << "]" << endl; + } + else { + _indexType.reset( plugin->generate( this ) ); + } + } + _finishedInit = true; + } + + + void IndexSpec::getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { + if ( _indexType.get() ){ + _indexType->getKeys( obj , keys ); + return; + } + vector fieldNames( _fieldNames ); + vector fixed( _fixed ); + _getKeys( fieldNames , fixed , obj, keys ); + if ( keys.empty() ) + keys.insert( _nullKey ); + } + + void IndexSpec::_getKeys( vector fieldNames , vector fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const { + BSONElement arrElt; + unsigned arrIdx = ~0; + for( unsigned i = 0; i < fieldNames.size(); ++i ) { + if ( *fieldNames[ i ] == '\0' ) + continue; + BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] ); + if ( e.eoo() ) + e = _nullElt; // no matching field + if ( e.type() != Array ) + fieldNames[ i ] = ""; // no matching field or non-array match + if ( *fieldNames[ i ] == '\0' ) + fixed[ i ] = e; // no need for further object expansion (though array expansion still possible) + if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here + arrIdx = i; + arrElt = e; + } + // enforce single array path here + if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ){ + stringstream ss; + ss << "cannot index parallel arrays [" << e.fieldName() << "] [" << arrElt.fieldName() << "]"; + uasserted( 10088 , ss.str() ); + } + } + + bool allFound = true; // have we found elements for all field names in the key spec? + for( vector::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ){ + if ( **i != '\0' ){ + allFound = false; + break; + } + } + + bool insertArrayNull = false; + + if ( allFound ) { + if ( arrElt.eoo() ) { + // no terminal array element to expand + BSONObjBuilder b(_sizeTracker); + for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) + b.appendAs( *i, "" ); + keys.insert( b.obj() ); + } + else { + // terminal array element to expand, so generate all keys + BSONObjIterator i( arrElt.embeddedObject() ); + if ( i.more() ){ + while( i.more() ) { + BSONObjBuilder b(_sizeTracker); + for( unsigned j = 0; j < fixed.size(); ++j ) { + if ( j == arrIdx ) + b.appendAs( i.next(), "" ); + else + b.appendAs( fixed[ j ], "" ); + } + keys.insert( b.obj() ); + } + } + else if ( fixed.size() > 1 ){ + insertArrayNull = true; + } + } + } else { + // nonterminal array element to expand, so recurse + assert( !arrElt.eoo() ); + BSONObjIterator i( arrElt.embeddedObject() ); + if ( i.more() ){ + while( i.more() ) { + BSONElement e = i.next(); + if ( e.type() == Object ){ + _getKeys( fieldNames, fixed, e.embeddedObject(), keys ); + } + } + } + else { + insertArrayNull = true; + } + } + + if ( insertArrayNull ) { + // x : [] - need to insert undefined + BSONObjBuilder b(_sizeTracker); + for( unsigned j = 0; j < fixed.size(); ++j ) { + if ( j == arrIdx ){ + b.appendUndefined( "" ); + } + else { + BSONElement e = fixed[j]; + if ( e.eoo() ) + b.appendNull( "" ); + else + b.appendAs( e , "" ); + } + } + keys.insert( b.obj() ); + } + } + + bool anyElementNamesMatch( const BSONObj& a , const BSONObj& b ){ + BSONObjIterator x(a); + while ( x.more() ){ + BSONElement e = x.next(); + BSONObjIterator y(b); + while ( y.more() ){ + BSONElement f = y.next(); + FieldCompareResult res = compareDottedFieldNames( e.fieldName() , f.fieldName() ); + if ( res == SAME || res == LEFT_SUBFIELD || res == RIGHT_SUBFIELD ) + return true; + } + } + return false; + } + + IndexSuitability IndexSpec::suitability( const BSONObj& query , const BSONObj& order ) const { + if ( _indexType.get() ) + return _indexType->suitability( query , order ); + return _suitability( query , order ); + } + + IndexSuitability IndexSpec::_suitability( const BSONObj& query , const BSONObj& order ) const { + // TODO: optimize + if ( anyElementNamesMatch( keyPattern , query ) == 0 && anyElementNamesMatch( keyPattern , order ) == 0 ) + return USELESS; + return HELPFUL; + } + + IndexSuitability IndexType::suitability( const BSONObj& query , const BSONObj& order ) const { + return _spec->_suitability( query , order ); + } + + bool IndexType::scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const { + return ! order.isEmpty(); + } + +} diff -Nru mongodb-1.4.4/db/indexkey.h mongodb-1.6.3/db/indexkey.h --- mongodb-1.4.4/db/indexkey.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/indexkey.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,174 @@ +// index_key.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../pch.h" +#include "diskloc.h" +#include "jsobj.h" +#include + +namespace mongo { + + class Cursor; + class IndexSpec; + class IndexType; // TODO: this name sucks + class IndexPlugin; + class IndexDetails; + + enum IndexSuitability { USELESS = 0 , HELPFUL = 1 , OPTIMAL = 2 }; + + /** + * this represents an instance of a index plugin + * done this way so parsing, etc... can be cached + * so if there is a FTS IndexPlugin, for each index using FTS + * there will be 1 of these, and it can have things pre-parsed, etc... + */ + class IndexType : boost::noncopyable { + public: + IndexType( const IndexPlugin * plugin , const IndexSpec * spec ); + virtual ~IndexType(); + + virtual void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const = 0; + virtual shared_ptr newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const = 0; + + /** optional op : changes query to match what's in the index */ + virtual BSONObj fixKey( const BSONObj& in ) { return in; } + + /** optional op : compare 2 objects with regards to this index */ + virtual int compare( const BSONObj& l , const BSONObj& r ) const; + + /** @return plugin */ + const IndexPlugin * getPlugin() const { return _plugin; } + + const BSONObj& keyPattern() const; + + virtual IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const ; + + virtual bool scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const ; + + protected: + const IndexPlugin * _plugin; + const IndexSpec * _spec; + }; + + /** + * this represents a plugin + * a plugin could be something like full text search, sparse index, etc... + * 1 of these exists per type of index per server + * 1 IndexType is created per index using this plugin + */ + class IndexPlugin : boost::noncopyable { + public: + IndexPlugin( const string& name ); + virtual ~IndexPlugin(){} + + virtual IndexType* generate( const IndexSpec * spec ) const = 0; + + static IndexPlugin* get( const string& name ){ + if ( ! _plugins ) + return 0; + map::iterator i = _plugins->find( name ); + if ( i == _plugins->end() ) + return 0; + return i->second; + } + + string getName() const { return _name; } + private: + string _name; + static map * _plugins; + }; + + /* precomputed details about an index, used for inserting keys on updates + stored/cached in NamespaceDetailsTransient, or can be used standalone + */ + class IndexSpec { + public: + BSONObj keyPattern; // e.g., { name : 1 } + BSONObj info; // this is the same as IndexDetails::info.obj() + + IndexSpec() + : _details(0) , _finishedInit(false){ + } + + IndexSpec( const BSONObj& k , const BSONObj& m = BSONObj() ) + : keyPattern(k) , info(m) , _details(0) , _finishedInit(false){ + _init(); + } + + /** + this is a DiscLoc of an IndexDetails info + should have a key field + */ + IndexSpec( const DiskLoc& loc ){ + reset( loc ); + } + + void reset( const DiskLoc& loc ); + void reset( const IndexDetails * details ); + + void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const; + + BSONElement missingField() const { return _nullElt; } + + string getTypeName() const { + if ( _indexType.get() ) + return _indexType->getPlugin()->getName(); + return ""; + } + + IndexType* getType() const { + return _indexType.get(); + } + + const IndexDetails * getDetails() const { + return _details; + } + + IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const ; + + protected: + + IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ; + + void _getKeys( vector fieldNames , vector fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const; + + BSONSizeTracker _sizeTracker; + + vector _fieldNames; + vector _fixed; + BSONObj _nullKey; + + BSONObj _nullObj; + BSONElement _nullElt; + + shared_ptr _indexType; + + const IndexDetails * _details; + + void _init(); + + public: + bool _finishedInit; + + friend class IndexType; + }; + + +} // namespace mongo diff -Nru mongodb-1.4.4/db/instance.cpp mongodb-1.6.3/db/instance.cpp --- mongodb-1.4.4/db/instance.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/instance.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "db.h" #include "query.h" #include "introspect.h" @@ -27,10 +27,11 @@ #include "lasterror.h" #include "security.h" #include "json.h" -#include "reccache.h" -#include "replset.h" +//#include "reccache.h" +#include "replpair.h" #include "../s/d_logic.h" #include "../util/file_allocator.h" +#include "../util/goodies.h" #include "cmdline.h" #if !defined(_WIN32) #include @@ -40,6 +41,9 @@ namespace mongo { + inline void opread(Message& m) { if( _diaglog.level & 2 ) _diaglog.readop((char *) m.singleData(), m.header()->len); } + inline void opwrite(Message& m) { if( _diaglog.level & 1 ) _diaglog.write((char *) m.singleData(), m.header()->len); } + void receivedKillCursors(Message& m); void receivedUpdate(Message& m, CurOp& op); void receivedDelete(Message& m, CurOp& op); @@ -51,16 +55,13 @@ string dbExecCommand; - string bind_ip = ""; - - char *appsrvPath = null; + char *appsrvPath = NULL; DiagLog _diaglog; bool useCursors = true; bool useHints = true; - void closeAllSockets(); void flushOpLog( stringstream &ss ) { if( _diaglog.f && _diaglog.f->is_open() ) { ss << "flushing op log and files\n"; @@ -76,7 +77,7 @@ // see FSyncCommand: unsigned lockedForWriting; - mongo::mutex lockedForWritingMutex; + mongo::mutex lockedForWritingMutex("lockedForWriting"); bool unlockRequested = false; void inProgCmd( Message &m, DbResponse &dbresponse ) { @@ -109,7 +110,7 @@ unsigned x = lockedForWriting; if( x ) { b.append("fsyncLock", x); - b.append("info", "use command {unlock:0} to terminate the fsync write/snapshot lock"); + b.append("info", "use db.$cmd.sys.unlock.findOne() to terminate the fsync write/snapshot lock"); } } @@ -132,6 +133,7 @@ obj = fromjson("{\"err\":\"no op number field specified?\"}"); } else { + log() << "going to kill op: " << e << endl; obj = fromjson("{\"info\":\"attempting to kill op\"}"); killCurrentOp.kill( (unsigned) e.number() ); } @@ -159,70 +161,73 @@ static bool receivedQuery(Client& c, DbResponse& dbresponse, Message& m ){ bool ok = true; - MSGID responseTo = m.data->id; + MSGID responseTo = m.header()->id; DbMessage d(m); QueryMessage q(d); - QueryResult* msgdata; + auto_ptr< Message > resp( new Message() ); CurOp& op = *(c.curop()); try { - msgdata = runQuery(m, q, op ).release(); + dbresponse.exhaust = runQuery(m, q, op, *resp); + assert( !resp->empty() ); } catch ( AssertionException& e ) { ok = false; op.debug().str << " exception "; - LOGSOME problem() << " Caught Assertion in runQuery ns:" << q.ns << ' ' << e.toString() << '\n'; - log() << " ntoskip:" << q.ntoskip << " ntoreturn:" << q.ntoreturn << '\n'; - if ( q.query.valid() ) - log() << " query:" << q.query.toString() << endl; - else - log() << " query object is not valid!" << endl; + LOGSOME { + log() << "assertion " << e.toString() << " ns:" << q.ns << " query:" << + (q.query.valid() ? q.query.toString() : "query object is corrupt") << endl; + if( q.ntoskip || q.ntoreturn ) + log() << " ntoskip:" << q.ntoskip << " ntoreturn:" << q.ntoreturn << endl; + } BSONObjBuilder err; - err.append("$err", e.msg.empty() ? "assertion during query" : e.msg); + e.getInfo().append( err ); BSONObj errObj = err.done(); BufBuilder b; b.skip(sizeof(QueryResult)); - b.append((void*) errObj.objdata(), errObj.objsize()); + b.appendBuf((void*) errObj.objdata(), errObj.objsize()); // todo: call replyToQuery() from here instead of this!!! see dbmessage.h - msgdata = (QueryResult *) b.buf(); + QueryResult * msgdata = (QueryResult *) b.buf(); b.decouple(); QueryResult *qr = msgdata; - qr->_resultFlags() = QueryResult::ResultFlag_ErrSet; + qr->_resultFlags() = ResultFlag_ErrSet; + if ( e.getCode() == StaleConfigInContextCode ) + qr->_resultFlags() |= ResultFlag_ShardConfigStale; qr->len = b.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = 1; + resp.reset( new Message() ); + resp->setData( msgdata, true ); + } + if ( op.shouldDBProfile( 0 ) ){ + op.debug().str << " bytes:" << resp->header()->dataLen(); } - Message *resp = new Message(); - resp->setData(msgdata, true); // transport will free - dbresponse.response = resp; + + dbresponse.response = resp.release(); dbresponse.responseTo = responseTo; - if ( op.shouldDBProfile( 0 ) ){ - op.debug().str << " bytes:" << resp->data->dataLen(); - } - return ok; } // Returns false when request includes 'end' - bool assembleResponse( Message &m, DbResponse &dbresponse, const sockaddr_in &client ) { + bool assembleResponse( Message &m, DbResponse &dbresponse, const SockAddr &client ) { // before we lock... - int op = m.data->operation(); + int op = m.operation(); bool isCommand = false; - const char *ns = m.data->_data + 4; + const char *ns = m.singleData()->_data + 4; if ( op == dbQuery ) { if( strstr(ns, ".$cmd") ) { isCommand = true; - OPWRITE; + opwrite(m); if( strstr(ns, ".$cmd.sys.") ) { if( strstr(ns, "$cmd.sys.inprog") ) { inProgCmd(m, dbresponse); @@ -237,28 +242,20 @@ return true; } } - } else { - OPREAD; + opread(m); } } else if( op == dbGetMore ) { - OPREAD; + opread(m); } else { - OPWRITE; + opwrite(m); } globalOpCounters.gotOp( op , isCommand ); - if ( handlePossibleShardedMessage( m , dbresponse ) ){ - /* important to do this before we lock - so if a message has to be forwarded, doesn't block for that - */ - return true; - } - Client& c = cc(); auto_ptr nestedOp; @@ -278,22 +275,21 @@ bool log = logLevel >= 1; if ( op == dbQuery ) { - if ( ! receivedQuery(c , dbresponse, m ) ) - log = true; + if ( handlePossibleShardedMessage( m , &dbresponse ) ) + return true; + receivedQuery(c , dbresponse, m ); } else if ( op == dbGetMore ) { - DEV log = true; if ( ! receivedGetMore(dbresponse, m, currentOp) ) log = true; } else if ( op == dbMsg ) { // deprecated - replaced by commands - char *p = m.data->_data; + char *p = m.singleData()->_data; int len = strlen(p); if ( len > 400 ) out() << curTimeMillis() % 10000 << - " long msg received, len:" << len << - " ends with: " << p + len - 10 << endl; + " long msg received, len:" << len << endl; Message *resp = new Message(); if ( strcmp( "end" , p ) == 0 ) @@ -302,10 +298,10 @@ resp->setData( opReply , "i am fine - dbMsg deprecated"); dbresponse.response = resp; - dbresponse.responseTo = m.data->id; + dbresponse.responseTo = m.header()->id; } else { - const char *ns = m.data->_data + 4; + const char *ns = m.singleData()->_data + 4; char cl[256]; nsToDatabase(ns, cl); if( ! c.getAuthenticationInfo()->isAuthorized(cl) ) { @@ -329,15 +325,16 @@ receivedKillCursors(m); } else { - out() << " operation isn't supported: " << op << endl; + mongo::log() << " operation isn't supported: " << op << endl; currentOp.done(); log = true; } } catch ( AssertionException& e ) { - problem() << " Caught Assertion in " << opToString(op) << " , continuing" << endl; + static int n; + tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing" << endl; ss << " exception " + e.toString(); - log = true; + log = ++n < 10; } } } @@ -346,10 +343,14 @@ int ms = currentOp.totalTimeMillis(); log = log || (logLevel >= 2 && ++ctr % 512 == 0); - DEV log = true; + //DEV log = true; if ( log || ms > logThreshold ) { - ss << ' ' << ms << "ms"; - mongo::log() << ss.str() << endl; + if( logLevel < 3 && op == dbGetMore && strstr(ns, ".oplog.") && ms < 3000 && !log ) { + /* it's normal for getMore on the oplog to be slow because of use of awaitdata flag. */ + } else { + ss << ' ' << ms << "ms"; + mongo::tlog() << ss.str() << endl; + } } if ( currentOp.shouldDBProfile( ms ) ){ @@ -374,12 +375,12 @@ void killCursors(int n, long long *ids); void receivedKillCursors(Message& m) { - int *x = (int *) m.data->_data; + int *x = (int *) m.singleData()->_data; x++; // reserved int n = *x++; uassert( 13004 , "sent 0 cursors to kill" , n >= 1 ); if ( n > 2000 ) { - problem() << "Assertion failure, receivedKillCursors, n=" << n << endl; + log( n < 30000 ? LL_WARNING : LL_ERROR ) << "receivedKillCursors, n=" << n << endl; assert( n < 30000 ); } killCursors(n, (long long *) x); @@ -397,7 +398,7 @@ Database *database = ctx->db(); assert( database->name == db ); - replCheckCloseDatabase( database ); + oplogCheckCloseDatabase( database ); if( BackgroundOperation::inProgForDb(db) ) { log() << "warning: bg op in prog during close db? " << db << endl; @@ -425,13 +426,14 @@ BSONObj query = d.nextJsObj(); assert( d.moreJSObjs() ); - assert( query.objsize() < m.data->dataLen() ); + assert( query.objsize() < m.header()->dataLen() ); BSONObj toupdate = d.nextJsObj(); uassert( 10055 , "update object too large", toupdate.objsize() <= MaxBSONObjectSize); - assert( toupdate.objsize() < m.data->dataLen() ); - assert( query.objsize() + toupdate.objsize() < m.data->dataLen() ); + assert( toupdate.objsize() < m.header()->dataLen() ); + assert( query.objsize() + toupdate.objsize() < m.header()->dataLen() ); bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; + bool broadcast = flags & UpdateOption_Broadcast; { string s = query.toString(); /* todo: we shouldn't do all this ss stuff when we don't need it, it will slow us down. @@ -443,10 +445,15 @@ } mongolock lk(1); + + // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit + if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) + return; + Client::Context ctx( ns ); UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() ); - recordUpdate( res.existing , (int) res.num ); // for getlasterror + lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror } void receivedDelete(Message& m, CurOp& op) { @@ -455,7 +462,8 @@ assert(*ns); uassert( 10056 , "not master", isMasterNs( ns ) ); int flags = d.pullInt(); - bool justOne = flags & 1; + bool justOne = flags & RemoveOption_JustOne; + bool broadcast = flags & RemoveOption_Broadcast; assert( d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); { @@ -465,10 +473,14 @@ } writelock lk(ns); + // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit + if ( ! broadcast & handlePossibleShardedMessage( m , 0 ) ) + return; + Client::Context ctx(ns); - + long long n = deleteObjects(ns, pattern, justOne, true); - recordDelete( (int) n ); + lastError.getSafe()->recordDelete( n ); } QueryResult* emptyMoreResult(long long); @@ -483,26 +495,48 @@ int ntoreturn = d.pullInt(); long long cursorid = d.pullInt64(); - ss << ns << " cid:" << cursorid << " ntoreturn:" << ntoreturn;; + ss << ns << " cid:" << cursorid; + if( ntoreturn ) + ss << " ntoreturn:" << ntoreturn; + int pass = 0; + bool exhaust = false; QueryResult* msgdata; - try { - mongolock lk(false); - Client::Context ctx(ns); - msgdata = getMore(ns, ntoreturn, cursorid, curop); - } - catch ( AssertionException& e ) { - ss << " exception " << e.toString(); - msgdata = emptyMoreResult(cursorid); - ok = false; - } + while( 1 ) { + try { + mongolock lk(false); + Client::Context ctx(ns); + msgdata = processGetMore(ns, ntoreturn, cursorid, curop, pass, exhaust); + } + catch ( GetMoreWaitException& ) { + exhaust = false; + massert(13073, "shutting down", !inShutdown() ); + pass++; + DEV + sleepmillis(20); + else + sleepmillis(2); + continue; + } + catch ( AssertionException& e ) { + exhaust = false; + ss << " exception " << e.toString(); + msgdata = emptyMoreResult(cursorid); + ok = false; + } + break; + }; + Message *resp = new Message(); resp->setData(msgdata, true); - ss << " bytes:" << resp->data->dataLen(); + ss << " bytes:" << resp->header()->dataLen(); ss << " nreturned:" << msgdata->nReturned; dbresponse.response = resp; - dbresponse.responseTo = m.data->id; - + dbresponse.responseTo = m.header()->id; + if( exhaust ) { + ss << " exhaust "; + dbresponse.exhaust = ns; + } return ok; } @@ -514,12 +548,17 @@ op.debug().str << ns; writelock lk(ns); + + if ( handlePossibleShardedMessage( m , 0 ) ) + return; + Client::Context ctx(ns); while ( d.moreJSObjs() ) { BSONObj js = d.nextJsObj(); uassert( 10059 , "object to insert too large", js.objsize() <= MaxBSONObjectSize); - theDataFileMgr.insert(ns, js, false); + theDataFileMgr.insertWithObjMod(ns, js, false); logOp("i", ns, js); + globalOpCounters.gotInsert(); } } @@ -538,15 +577,15 @@ Message & container; }; - void getDatabaseNames( vector< string > &names ) { - boost::filesystem::path path( dbpath ); + void getDatabaseNames( vector< string > &names , const string& usePath ) { + boost::filesystem::path path( usePath ); for ( boost::filesystem::directory_iterator i( path ); i != boost::filesystem::directory_iterator(); ++i ) { if ( directoryperdb ) { boost::filesystem::path p = *i; string dbName = p.leaf(); p /= ( dbName + ".ns" ); - if ( boost::filesystem::exists( p ) ) + if ( MMF::exists( p ) ) names.push_back( dbName ); } else { string fileName = boost::filesystem::path(*i).leaf(); @@ -556,12 +595,34 @@ } } + /* returns true if there is data on this server. useful when starting replication. + local database does NOT count except for rsoplog collection. + */ + bool replHasDatabases() { + vector names; + getDatabaseNames(names); + if( names.size() >= 2 ) return true; + if( names.size() == 1 ){ + if( names[0] != "local" ) + return true; + // we have a local database. return true if oplog isn't empty + { + readlock lk(rsoplog); + BSONObj o; + if( Helpers::getFirst(rsoplog, o) ) + return true; + } + } + return false; + } + bool DBDirectClient::call( Message &toSend, Message &response, bool assertOk ) { if ( lastError._get() ) lastError.startRequest( toSend, lastError._get() ); DbResponse dbResponse; assembleResponse( toSend, dbResponse ); assert( dbResponse.response ); + dbResponse.response->concat(); // can get rid of this if we make response handling smarter response = *dbResponse.response; return true; } @@ -583,14 +644,17 @@ //throw UserException( (string)"yay:" + ns ); } + void DBDirectClient::killCursor( long long id ){ + ClientCursor::erase( id ); + } DBClientBase * createDirectClient(){ return new DBDirectClient(); } - void recCacheCloseAll(); + //void recCacheCloseAll(); - mongo::mutex exitMutex; + mongo::mutex exitMutex("exit"); int numExitCalls = 0; void shutdown(); @@ -642,63 +706,82 @@ catch ( ... ){ tryToOutputFatal( "shutdown failed with exception" ); } + + try { + mutexDebugger.programEnding(); + } + catch (...) { } - tryToOutputFatal( "dbexit: really exiting now\n" ); + tryToOutputFatal( "dbexit: really exiting now" ); if ( c ) c->shutdown(); ::exit(rc); } void shutdown() { - log() << "\t shutdown: going to close listening sockets..." << endl; + log() << "shutdown: going to close listening sockets..." << endl; ListeningSockets::get()->closeAll(); - log() << "\t shutdown: going to flush oplog..." << endl; + log() << "shutdown: going to flush oplog..." << endl; stringstream ss2; flushOpLog( ss2 ); rawOut( ss2.str() ); /* must do this before unmapping mem or you may get a seg fault */ - log() << "\t shutdown: going to close sockets..." << endl; - boost::thread close_socket_thread(closeAllSockets); + log() << "shutdown: going to close sockets..." << endl; + boost::thread close_socket_thread( boost::bind(MessagingPort::closeAllSockets, 0) ); // wait until file preallocation finishes // we would only hang here if the file_allocator code generates a // synchronous signal, which we don't expect - log() << "\t shutdown: waiting for fs preallocator..." << endl; + log() << "shutdown: waiting for fs preallocator..." << endl; theFileAllocator().waitUntilFinished(); - log() << "\t shutdown: closing all files..." << endl; + log() << "shutdown: closing all files..." << endl; stringstream ss3; MemoryMappedFile::closeAllFiles( ss3 ); rawOut( ss3.str() ); // should we be locked here? we aren't. might be ok as-is. - recCacheCloseAll(); + //recCacheCloseAll(); #if !defined(_WIN32) && !defined(__sunos__) if ( lockFile ){ - log() << "\t shutdown: removing fs lock..." << endl; + log() << "shutdown: removing fs lock..." << endl; if( ftruncate( lockFile , 0 ) ) - log() << "\t couldn't remove fs lock " << OUTPUT_ERRNO << endl; + log() << "couldn't remove fs lock " << errnoWithDescription() << endl; flock( lockFile, LOCK_UN ); } #endif } - void acquirePathLock() { #if !defined(_WIN32) && !defined(__sunos__) - string name = ( boost::filesystem::path( dbpath ) / "mongod.lock" ).native_file_string(); + void writePid(int fd) { + stringstream ss; + ss << getpid() << endl; + string s = ss.str(); + const char * data = s.c_str(); + assert ( write( fd, data, strlen( data ) ) ); + } + + void acquirePathLock() { + string name = ( boost::filesystem::path( dbpath ) / "mongod.lock" ).native_file_string(); bool oldFile = false; - if ( boost::filesystem::exists( name ) && boost::filesystem::file_size( name ) > 0 ){ + if ( boost::filesystem::exists( name ) && boost::filesystem::file_size( name ) > 0 ) { oldFile = true; } - - lockFile = open( name.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO ); - uassert( 10309 , "Unable to create / open lock file for dbpath: " + name, lockFile > 0 ); - uassert( 10310 , "Unable to acquire lock for dbpath: " + name, flock( lockFile, LOCK_EX | LOCK_NB ) == 0 ); + + lockFile = open( name.c_str(), O_RDWR | O_CREAT , S_IRWXU | S_IRWXG | S_IRWXO ); + if( lockFile <= 0 ) { + uasserted( 10309 , str::stream() << "Unable to create / open lock file for lockfilepath: " << name << ' ' << errnoWithDescription()); + } + if (flock( lockFile, LOCK_EX | LOCK_NB ) != 0) { + close ( lockFile ); + lockFile = 0; + uassert( 10310 , "Unable to acquire lock for lockfilepath: " + name, 0 ); + } if ( oldFile ){ // we check this here because we want to see if we can get the lock @@ -708,17 +791,19 @@ << "recommend removing file and running --repair\n" << "see: http://dochub.mongodb.org/core/repair for more information\n" << "*************" << endl; + close ( lockFile ); + lockFile = 0; uassert( 12596 , "old lock file" , 0 ); } - - stringstream ss; - ss << getpid() << endl; - string s = ss.str(); - const char * data = s.c_str(); - assert( write( lockFile , data , strlen( data ) ) ); + uassert( 13342, "Unable to truncate lock file", ftruncate(lockFile, 0) == 0); + writePid( lockFile ); fsync( lockFile ); -#endif } +#else + void acquirePathLock() { + // TODO - this is very bad + } +#endif } // namespace mongo diff -Nru mongodb-1.4.4/db/instance.h mongodb-1.6.3/db/instance.h --- mongodb-1.4.4/db/instance.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/instance.h 2010-09-24 10:02:42.000000000 -0700 @@ -19,6 +19,7 @@ #pragma once + #include "../client/dbclient.h" #include "curop.h" #include "security.h" @@ -29,9 +30,6 @@ extern string dbExecCommand; -#define OPWRITE if( _diaglog.level & 1 ) _diaglog.write((char *) m.data, m.data->len); -#define OPREAD if( _diaglog.level & 2 ) _diaglog.readop((char *) m.data, m.data->len); - struct DiagLog { ofstream *f; /* 0 = off; 1 = writes, 2 = reads, 3 = both @@ -40,7 +38,7 @@ int level; mongo::mutex mutex; - DiagLog() : f(0) , level(0) { } + DiagLog() : f(0) , level(0), mutex("DiagLog") { } void init() { if ( ! f && level ){ log() << "diagLogging = " << level << endl; @@ -96,21 +94,23 @@ struct DbResponse { Message *response; MSGID responseTo; - DbResponse(Message *r, MSGID rt) : response(r), responseTo(rt) { - } + const char *exhaust; /* points to ns if exhaust mode. 0=normal mode*/ + DbResponse(Message *r, MSGID rt) : response(r), responseTo(rt), exhaust(0) { } DbResponse() { response = 0; + exhaust = 0; } - ~DbResponse() { - delete response; - } + ~DbResponse() { delete response; } }; - - static SockAddr unknownAddress( "0.0.0.0", 0 ); - bool assembleResponse( Message &m, DbResponse &dbresponse, const sockaddr_in &client = unknownAddress.sa ); + bool assembleResponse( Message &m, DbResponse &dbresponse, const SockAddr &client = unknownAddress ); + + void getDatabaseNames( vector< string > &names , const string& usePath = dbpath ); - void getDatabaseNames( vector< string > &names ); + /* returns true if there is no data on this server. useful when starting replication. + local database does NOT count. + */ + bool replHasDatabases(); // --- local client --- @@ -119,7 +119,7 @@ public: virtual auto_ptr query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); - + virtual bool isFailed() const { return false; } @@ -135,9 +135,19 @@ // don't need to piggy back when connected locally return say( toSend ); } + + virtual void killCursor( long long cursorID ); + + virtual bool callRead( Message& toSend , Message& response ){ + return call( toSend , response ); + } + + virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; } + virtual bool isMember( const DBConnector * conn ) const { return this == conn; }; }; extern int lockFile; void acquirePathLock(); + void maybeCreatePidFile(); } // namespace mongo diff -Nru mongodb-1.4.4/db/introspect.cpp mongodb-1.6.3/db/introspect.cpp --- mongodb-1.4.4/db/introspect.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/introspect.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,9 +16,9 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "introspect.h" -#include "../util/builder.h" +#include "../bson/util/builder.h" #include "../util/goodies.h" #include "pdfile.h" #include "jsobj.h" diff -Nru mongodb-1.4.4/db/introspect.h mongodb-1.6.3/db/introspect.h --- mongodb-1.4.4/db/introspect.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/introspect.h 2010-09-24 10:02:42.000000000 -0700 @@ -19,7 +19,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" #include "pdfile.h" diff -Nru mongodb-1.4.4/db/jsobj.cpp mongodb-1.6.3/db/jsobj.cpp --- mongodb-1.4.4/db/jsobj.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/jsobj.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,11 +17,10 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "jsobj.h" #include "nonce.h" -#include "../util/atomic_int.h" -#include "../util/goodies.h" +#include "../bson/util/atomic_int.h" #include "../util/base64.h" #include "../util/md5.hpp" #include @@ -31,9 +30,8 @@ #include "jsobjmanipulator.h" #include "../util/optime.h" #include -#include #undef assert -#define assert xassert +#define assert MONGO_assert // make sure our assumptions are valid BOOST_STATIC_ASSERT( sizeof(int) == 4 ); @@ -46,114 +44,12 @@ BSONElement nullElement; - ostream& operator<<( ostream &s, const OID &o ) { - s << o.str(); - return s; - } - - IDLabeler GENOID; + GENOIDLabeler GENOID; DateNowLabeler DATENOW; - string BSONElement::toString( bool includeFieldName ) const { - stringstream s; - if ( includeFieldName && type() != EOO ) - s << fieldName() << ": "; - switch ( type() ) { - case EOO: - return "EOO"; - case Date: - s << "new Date(" << date() << ')'; - break; - case RegEx: - { - s << "/" << regex() << '/'; - const char *p = regexFlags(); - if ( p ) s << p; - } - break; - case NumberDouble: - { - stringstream tmp; - tmp.precision( 16 ); - tmp << number(); - string n = tmp.str(); - s << n; - // indicate this is a double: - if( strchr(n.c_str(), '.') == 0 && strchr(n.c_str(), 'E') == 0 && strchr(n.c_str(), 'N') == 0 ) - s << ".0"; - } - break; - case NumberLong: - s << _numberLong(); - break; - case NumberInt: - s << _numberInt(); - break; - case Bool: - s << ( boolean() ? "true" : "false" ); - break; - case Object: - case Array: - s << embeddedObject().toString(); - break; - case Undefined: - s << "undefined"; - break; - case jstNULL: - s << "null"; - break; - case MaxKey: - s << "MaxKey"; - break; - case MinKey: - s << "MinKey"; - break; - case CodeWScope: - s << "CodeWScope( " - << codeWScopeCode() << ", " << codeWScopeObject().toString() << ")"; - break; - case Code: - if ( valuestrsize() > 80 ) - s << string(valuestr()).substr(0, 70) << "..."; - else { - s << valuestr(); - } - break; - case Symbol: - case String: - if ( valuestrsize() > 80 ) - s << '"' << string(valuestr()).substr(0, 70) << "...\""; - else { - s << '"' << valuestr() << '"'; - } - break; - case DBRef: - s << "DBRef('" << valuestr() << "',"; - { - OID *x = (OID *) (valuestr() + valuestrsize()); - s << *x << ')'; - } - break; - case jstOID: - s << "ObjId("; - s << __oid() << ')'; - break; - case BinData: - s << "BinData"; - break; - case Timestamp: - s << "Timestamp " << timestampTime() << "|" << timestampInc(); - break; - default: - s << "?type=" << type(); - break; - } - return s.str(); - } - string escape( string s , bool escape_slash=false) { - stringstream ret; + StringBuilder ret; for ( string::iterator i = s.begin(); i != s.end(); ++i ) { switch ( *i ) { case '"': @@ -182,11 +78,9 @@ break; default: if ( *i >= 0 && *i <= 0x1f ) { - ret << "\\u"; - ret << hex; - ret.width( 4 ); - ret.fill( '0' ); - ret << int( *i ); + //TODO: these should be utf16 code-units not bytes + char c = *i; + ret << "\\u00" << toHexLower(&c, 1); } else { ret << *i; } @@ -195,14 +89,18 @@ return ret.str(); } - string BSONElement::jsonString( JsonStringFormat format, bool includeFieldNames ) const { + string BSONElement::jsonString( JsonStringFormat format, bool includeFieldNames, int pretty ) const { + BSONType t = type(); + if ( t == Undefined ) + return ""; + stringstream s; if ( includeFieldNames ) s << '"' << escape( fieldName() ) << "\" : "; switch ( type() ) { - case String: + case mongo::String: case Symbol: - s << '"' << escape( valuestr() ) << '"'; + s << '"' << escape( string(valuestr(), valuestrsize()-1) ) << '"'; break; case NumberLong: s << _numberLong(); @@ -214,22 +112,22 @@ s.precision( 16 ); s << number(); } else { - stringstream ss; + StringBuilder ss; ss << "Number " << number() << " cannot be represented in JSON"; string message = ss.str(); massert( 10311 , message.c_str(), false ); } break; - case Bool: + case mongo::Bool: s << ( boolean() ? "true" : "false" ); break; case jstNULL: s << "null"; break; case Object: - s << embeddedObject().jsonString( format ); + s << embeddedObject().jsonString( format, pretty ); break; - case Array: { + case mongo::Array: { if ( embeddedObject().isEmpty() ) { s << "[]"; break; @@ -239,7 +137,12 @@ BSONElement e = i.next(); if ( !e.eoo() ) while ( 1 ) { - s << e.jsonString( format, false ); + if( pretty ) { + s << '\n'; + for( int x = 0; x < pretty; x++ ) + s << " "; + } + s << e.jsonString( format, false, pretty?pretty+1:0 ); e = i.next(); if ( e.eoo() ) break; @@ -249,7 +152,7 @@ break; } case DBRef: { - OID *x = (OID *) (valuestr() + valuestrsize()); + mongo::OID *x = (mongo::OID *) (valuestr() + valuestrsize()); if ( format == TenGen ) s << "Dbref( "; else @@ -290,12 +193,18 @@ s << "\" }"; break; } - case Date: + case mongo::Date: if ( format == Strict ) s << "{ \"$date\" : "; else s << "Date( "; - s << date(); + if( pretty ) { + Date_t d = date(); + if( d == 0 ) s << '0'; + else + s << '"' << date().toString() << '"'; + } else + s << date(); if ( format == Strict ) s << " }"; else @@ -321,16 +230,34 @@ } break; + case CodeWScope: { + BSONObj scope = codeWScopeObject(); + if ( ! scope.isEmpty() ){ + s << "{ \"$code\" : " << _asCode() << " , " + << " \"$scope\" : " << scope.jsonString() << " }"; + break; + } + } + + case Code: - s << ascode(); + s << _asCode(); break; - + case Timestamp: s << "{ \"t\" : " << timestampTime() << " , \"i\" : " << timestampInc() << " }"; break; + case MinKey: + s << "{ \"$minKey\" : 1 }"; + break; + + case MaxKey: + s << "{ \"$maxKey\" : 1 }"; + break; + default: - stringstream ss; + StringBuilder ss; ss << "Cannot create a properly formatted JSON string with " << "element: " << toString() << " of type: " << type(); string message = ss.str(); @@ -339,82 +266,6 @@ return s.str(); } - int BSONElement::size( int maxLen ) const { - if ( totalSize >= 0 ) - return totalSize; - - int remain = maxLen - fieldNameSize() - 1; - - int x = 0; - switch ( type() ) { - case EOO: - case Undefined: - case jstNULL: - case MaxKey: - case MinKey: - break; - case Bool: - x = 1; - break; - case NumberInt: - x = 4; - break; - case Timestamp: - case Date: - case NumberDouble: - case NumberLong: - x = 8; - break; - case jstOID: - x = 12; - break; - case Symbol: - case Code: - case String: - massert( 10313 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); - x = valuestrsize() + 4; - break; - case CodeWScope: - massert( 10314 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); - x = objsize(); - break; - - case DBRef: - massert( 10315 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); - x = valuestrsize() + 4 + 12; - break; - case Object: - case Array: - massert( 10316 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); - x = objsize(); - break; - case BinData: - massert( 10317 , "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 ); - x = valuestrsize() + 4 + 1/*subtype*/; - break; - case RegEx: - { - const char *p = value(); - int len1 = ( maxLen == -1 ) ? strlen( p ) : strnlen( p, remain ); - massert( 10318 , "Invalid regex string", len1 != -1 ); - p = p + len1 + 1; - int len2 = ( maxLen == -1 ) ? strlen( p ) : strnlen( p, remain - len1 - 1 ); - massert( 10319 , "Invalid regex options string", len2 != -1 ); - x = len1 + 1 + len2 + 1; - } - break; - default: { - stringstream ss; - ss << "BSONElement: bad type " << (int) type(); - string msg = ss.str(); - massert( 10320 , msg.c_str(),false); - } - } - totalSize = x + fieldNameSize() + 1; // BSONType - - return totalSize; - } - int BSONElement::getGtLtOp( int def ) const { const char *fn = fieldName(); if ( fn[0] == '$' && fn[1] ) { @@ -434,8 +285,12 @@ if ( fn[3] == 'a' && fn[4] == 'r' && fn[5] == 0 ) return BSONObj::opNEAR; } - else if ( fn[1] == 'm' && fn[2] == 'o' && fn[3] == 'd' && fn[4] == 0 ) - return BSONObj::opMOD; + else if ( fn[1] == 'm' ){ + if ( fn[2] == 'o' && fn[3] == 'd' && fn[4] == 0 ) + return BSONObj::opMOD; + if ( fn[2] == 'a' && fn[3] == 'x' && fn[4] == 'D' && fn[5] == 'i' && fn[6] == 's' && fn[7] == 't' && fn[8] == 'a' && fn[9] == 'n' && fn[10] == 'c' && fn[11] == 'e' && fn[12] == 0 ) + return BSONObj::opMAX_DISTANCE; + } else if ( fn[1] == 't' && fn[2] == 'y' && fn[3] == 'p' && fn[4] == 'e' && fn[5] == 0 ) return BSONObj::opTYPE; else if ( fn[1] == 'i' && fn[2] == 'n' && fn[3] == 0 ) @@ -579,40 +434,6 @@ return -1; } - void BSONElement::validate() const { - switch( type() ) { - case DBRef: - case Code: - case Symbol: - case String: { - int x = valuestrsize(); - if ( x > 0 && valuestr()[x-1] == 0 ) - return; - StringBuilder buf; - buf << "Invalid dbref/code/string/symbol size: " << x << " strnlen:" << strnlen( valuestr() , x ); - massert( 10321 , buf.str() , 0 ); - break; - } - case CodeWScope: { - int totalSize = *( int * )( value() ); - massert( 10322 , "Invalid CodeWScope size", totalSize >= 8 ); - int strSizeWNull = *( int * )( value() + 4 ); - massert( 10323 , "Invalid CodeWScope string size", totalSize >= strSizeWNull + 4 + 4 ); - massert( 10324 , "Invalid CodeWScope string size", - strSizeWNull > 0 && - strSizeWNull - 1 == strnlen( codeWScopeCode(), strSizeWNull ) ); - massert( 10325 , "Invalid CodeWScope size", totalSize >= strSizeWNull + 4 + 4 + 4 ); - int objSize = *( int * )( value() + 4 + 4 + strSizeWNull ); - massert( 10326 , "Invalid CodeWScope object size", totalSize == 4 + 4 + strSizeWNull + objSize ); - // Subobject validation handled elsewhere. - } - case Object: - // We expect Object size validation to be handled elsewhere. - default: - break; - } - } - /* Matcher --------------------------------------*/ // If the element is something like: @@ -676,39 +497,6 @@ /* BSONObj ------------------------------------------------------------*/ - BSONObj::EmptyObject BSONObj::emptyObject; - - string BSONObj::toString() const { - if ( isEmpty() ) return "{}"; - - stringstream s; - s << "{ "; - BSONObjIterator i(*this); - bool first = true; - while ( 1 ) { - massert( 10327 , "Object does not end with EOO", i.moreWithEOO() ); - BSONElement e = i.next( true ); - massert( 10328 , "Invalid element size", e.size() > 0 ); - massert( 10329 , "Element too large", e.size() < ( 1 << 30 ) ); - int offset = e.rawdata() - this->objdata(); - massert( 10330 , "Element extends past end of object", - e.size() + offset <= this->objsize() ); - e.validate(); - bool end = ( e.size() + offset == this->objsize() ); - if ( e.eoo() ) { - massert( 10331 , "EOO Before end of object", end ); - break; - } - if ( first ) - first = false; - else - s << ", "; - s << e.toString(); - } - s << " }"; - return s.str(); - } - string BSONObj::md5() const { md5digest d; md5_state_t st; @@ -718,21 +506,29 @@ return digestToString( d ); } - string BSONObj::jsonString( JsonStringFormat format ) const { + string BSONObj::jsonString( JsonStringFormat format, int pretty ) const { if ( isEmpty() ) return "{}"; - stringstream s; + StringBuilder s; s << "{ "; BSONObjIterator i(*this); BSONElement e = i.next(); if ( !e.eoo() ) while ( 1 ) { - s << e.jsonString( format ); + s << e.jsonString( format, true, pretty?pretty+1:0 ); e = i.next(); if ( e.eoo() ) break; - s << ", "; + s << ","; + if ( pretty ) { + s << '\n'; + for( int x = 0; x < pretty; x++ ) + s << " "; + } + else { + s << " "; + } } s << " }"; return s.str(); @@ -740,13 +536,60 @@ // todo: can be a little faster if we don't use toString() here. bool BSONObj::valid() const { - try { - toString(); + try{ + BSONObjIterator it(*this); + while( it.moreWithEOO() ){ + // both throw exception on failure + BSONElement e = it.next(true); + e.validate(); + + if (e.eoo()){ + if (it.moreWithEOO()) + return false; + return true; + }else if (e.isABSONObj()){ + if(!e.embeddedObject().valid()) + return false; + }else if (e.type() == CodeWScope){ + if(!e.codeWScopeObject().valid()) + return false; + } + } + } catch (...) { } - catch (...) { - return false; + return false; + } + + int BSONObj::woCompare(const BSONObj& r, const Ordering &o, bool considerFieldName) const { + if ( isEmpty() ) + return r.isEmpty() ? 0 : -1; + if ( r.isEmpty() ) + return 1; + + BSONObjIterator i(*this); + BSONObjIterator j(r); + unsigned mask = 1; + while ( 1 ) { + // so far, equal... + + BSONElement l = i.next(); + BSONElement r = j.next(); + if ( l.eoo() ) + return r.eoo() ? 0 : -1; + if ( r.eoo() ) + return 1; + + int x; + { + x = l.woCompare( r, considerFieldName ); + if( o.descending(mask) ) + x = -x; + } + if ( x != 0 ) + return x; + mask <<= 1; } - return true; + return -1; } /* well ordered compare */ @@ -796,7 +639,7 @@ BSONObj staticNull = fromjson( "{'':null}" ); /* well ordered compare */ - int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey ) const{ + int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey , bool useDotted ) const{ if ( isEmpty() ) return other.isEmpty() ? 0 : -1; if ( other.isEmpty() ) @@ -810,10 +653,10 @@ if ( f.eoo() ) return 0; - BSONElement l = getField( f.fieldName() ); + BSONElement l = useDotted ? getFieldDotted( f.fieldName() ) : getField( f.fieldName() ); if ( l.eoo() ) l = staticNull.firstElement(); - BSONElement r = other.getField( f.fieldName() ); + BSONElement r = useDotted ? other.getFieldDotted( f.fieldName() ) : other.getField( f.fieldName() ); if ( r.eoo() ) r = staticNull.firstElement(); @@ -826,78 +669,46 @@ return -1; } - - /* return has eoo() true if no match - supports "." notation to reach into embedded objects - */ - BSONElement BSONObj::getFieldDotted(const char *name) const { + void BSONObj::getFieldsDotted(const StringData& name, BSONElementSet &ret ) const { BSONElement e = getField( name ); if ( e.eoo() ) { - const char *p = strchr(name, '.'); + const char *p = strchr(name.data(), '.'); if ( p ) { - string left(name, p-name); - BSONObj sub = getObjectField(left.c_str()); - return sub.isEmpty() ? nullElement : sub.getFieldDotted(p+1); - } - } - - return e; - } - - void BSONObj::getFieldsDotted(const char *name, BSONElementSet &ret ) const { - BSONObjIterator i(*this); - while ( i.more() ){ - BSONElement e = i.next(); - FieldCompareResult cmp = compareDottedFieldNames( name , e.fieldName() ); - switch ( cmp ){ - - case LEFT_BEFORE: - case RIGHT_BEFORE: - break; - - case RIGHT_SUBFIELD: - assert(0); - break; - - case LEFT_SUBFIELD: { - const char * next = name + strlen( e.fieldName() ) + 1; - bool allDigits = false; - if ( isdigit( *next ) ){ - const char * temp = next + 1; - while ( isdigit( *temp ) ) - temp++; - allDigits = *temp == '.'; - } - - if ( e.type() == Object || allDigits ){ - e.embeddedObject().getFieldsDotted( next , ret ); - } - else if ( e.type() == Array ){ - BSONObjIterator j( e.embeddedObject() ); - while ( j.more() ){ - BSONElement f = j.next(); - if ( f.type() == Object ) - f.embeddedObject().getFieldsDotted( next , ret ); + string left(name.data(), p-name.data()); + const char* next = p+1; + BSONElement e = getField( left.c_str() ); + + if (e.type() == Object){ + e.embeddedObject().getFieldsDotted(next, ret); + } else if (e.type() == Array) { + bool allDigits = false; + if ( isdigit( *next ) ){ + const char * temp = next + 1; + while ( isdigit( *temp ) ) + temp++; + allDigits = *temp == '.'; } + if (allDigits) { + e.embeddedObject().getFieldsDotted(next, ret); + } else { + BSONObjIterator i(e.embeddedObject()); + while ( i.more() ){ + BSONElement e2 = i.next(); + if (e2.type() == Object || e2.type() == Array) + e2.embeddedObject().getFieldsDotted(next, ret); + } + } + } else { + // do nothing: no match } - else { - // intentially left blank, this means no match - } - return; - } - - case SAME: { - if ( e.type() == Array ){ - BSONObjIterator j( e.embeddedObject() ); - while ( j.more() ) - ret.insert( j.next() ); - } - else { - ret.insert( e ); - } - return; } - + } else { + if (e.type() == Array){ + BSONObjIterator i(e.embeddedObject()); + while ( i.more() ) + ret.insert(i.next()); + } else { + ret.insert(e); } } } @@ -915,7 +726,7 @@ BSONElement sub = getField(left.c_str()); if ( sub.eoo() ) return nullElement; - else if ( sub.type() == Array || strlen( name ) == 0 ) + else if ( sub.type() == Array || name[0] == '\0') return sub; else if ( sub.type() == Object ) return sub.embeddedObject().getFieldDottedOrArray( name ); @@ -923,31 +734,6 @@ return nullElement; } - /* makes a new BSONObj with the fields specified in pattern. - fields returned in the order they appear in pattern. - if any field missing or undefined in the original object, that field - in the output will be null. - - n^2 implementation bad if pattern and object have lots - of fields - normally pattern doesn't so should be fine. - */ - BSONObj BSONObj::extractFieldsDotted(BSONObj pattern) const { - BSONObjBuilder b; - BSONObjIterator i(pattern); - while (i.more()) { - BSONElement e = i.next(); - const char *name = e.fieldName(); - - BSONElement x = getFieldDotted( name ); - if ( x.eoo() || x.type() == Undefined ) { - b.appendNull(name); - } else { - b.appendAs(x, name); - } - } - return b.done(); - } - /** sets element field names to empty string If a field in pattern is missing, it is omitted from the returned @@ -1037,24 +823,6 @@ return e.type() == String ? e.valuestr() : ""; } - BSONObj BSONObj::getObjectField(const char *name) const { - BSONElement e = getField(name); - BSONType t = e.type(); - return t == Object || t == Array ? e.embeddedObject() : BSONObj(); - } - - int BSONObj::nFields() const { - int n = 0; - BSONObjIterator i(*this); - while ( i.moreWithEOO() ) { - BSONElement e = i.next(); - if ( e.eoo() ) - break; - n++; - } - return n; - } - /* grab names of all the fields in this object */ int BSONObj::getFieldNames(set& fields) const { int n = 0; @@ -1186,6 +954,18 @@ return true; } + void BSONObj::dump() const { + out() << hex; + const char *p = objdata(); + for ( int i = 0; i < objsize(); i++ ) { + out() << i << '\t' << ( 0xff & ( (unsigned) *p ) ); + if ( *p >= 'A' && *p <= 'z' ) + out() << '\t' << *p; + out() << endl; + p++; + } + } + string BSONObj::hexDump() const { stringstream ss; const char *d = objdata(); @@ -1202,14 +982,6 @@ return ss.str(); } - ostream& operator<<( ostream &s, const BSONObj &o ) { - return s << o.toString(); - } - - ostream& operator<<( ostream &s, const BSONElement &e ) { - return s << e.toString(); - } - void nested2dotted(BSONObjBuilder& b, const BSONObj& obj, const string& base){ BSONObjIterator it(obj); while (it.more()){ @@ -1219,7 +991,7 @@ nested2dotted(b, e.embeddedObject(), newbase); }else{ string newbase = base + e.fieldName(); - b.appendAs(e, newbase.c_str()); + b.appendAs(e, newbase); } } } @@ -1265,6 +1037,7 @@ } minkeydata; BSONObj minKey((const char *) &minkeydata); +/* struct JSObj0 { JSObj0() { totsize = 5; @@ -1273,14 +1046,9 @@ int totsize; char eoo; } js0; +*/ #pragma pack() - BSONElement::BSONElement() { - data = &js0.eoo; - fieldNameSize_ = 0; - totalSize = 1; - } - struct BsonUnitTest : public UnitTest { void testRegex() { @@ -1425,14 +1193,8 @@ } */ - unsigned OID::_machine = (unsigned) security.getNonceInitSafe(); - void OID::newState(){ - // using fresh Security object to avoid buffered devrandom - _machine = (unsigned) Security().getNonce(); - } - void OID::init() { - static AtomicUInt inc = (unsigned) security.getNonce(); + static AtomicUInt inc = getRandomNumber(); unsigned t = (unsigned) time(0); char *T = (char *) &t; data[0] = T[3]; @@ -1451,31 +1213,45 @@ raw[3] = T[0]; } + unsigned OID::_machine = (unsigned) security.getNonceInitSafe(); + void OID::newState(){ + unsigned before = _machine; + // using fresh Security object to avoid buffered devrandom + _machine = (unsigned)security.getNonce(); + assert( _machine != before ); + } + void OID::init( string s ){ assert( s.size() == 24 ); const char *p = s.c_str(); - char buf[3]; - buf[2] = 0; for( int i = 0; i < 12; i++ ) { - buf[0] = p[0]; - buf[1] = p[1]; + data[i] = fromHex(p); p += 2; - stringstream ss(buf); - unsigned z; - ss >> hex >> z; - data[i] = z; } + } -/* - string as = s.substr( 0 , 16 ); - string bs = s.substr( 16 ); + void OID::init(Date_t date, bool max){ + int time = (int) (date / 1000); + char* T = (char *) &time; + data[0] = T[3]; + data[1] = T[2]; + data[2] = T[1]; + data[3] = T[0]; - stringstream ssa(as); - ssa >> hex >> a; + if (max) + *(long long*)(data + 4) = 0xFFFFFFFFFFFFFFFFll; + else + *(long long*)(data + 4) = 0x0000000000000000ll; + } - stringstream ssb(bs); - ssb >> hex >> b; -*/ + time_t OID::asTimeT(){ + int time; + char* T = (char *) &time; + T[0] = data[3]; + T[1] = data[2]; + T[2] = data[1]; + T[3] = data[0]; + return time; } Labeler::Label GT( "$gt" ); @@ -1492,84 +1268,83 @@ timestamp = OpTime::now().asDate(); } - - void BSONObjBuilder::appendMinForType( const string& field , int t ){ + void BSONObjBuilder::appendMinForType( const StringData& fieldName , int t ){ switch ( t ){ - case MinKey: appendMinKey( field.c_str() ); return; - case MaxKey: appendMinKey( field.c_str() ); return; + case MinKey: appendMinKey( fieldName ); return; + case MaxKey: appendMinKey( fieldName ); return; case NumberInt: case NumberDouble: case NumberLong: - append( field.c_str() , - numeric_limits::max() ); return; + append( fieldName , - numeric_limits::max() ); return; case jstOID: { OID o; memset(&o, 0, sizeof(o)); - appendOID( field.c_str() , &o); + appendOID( fieldName , &o); return; } - case Bool: appendBool( field.c_str() , false); return; - case Date: appendDate( field.c_str() , 0); return; - case jstNULL: appendNull( field.c_str() ); return; + case Bool: appendBool( fieldName , false); return; + case Date: appendDate( fieldName , 0); return; + case jstNULL: appendNull( fieldName ); return; case Symbol: - case String: append( field.c_str() , "" ); return; - case Object: append( field.c_str() , BSONObj() ); return; + case String: append( fieldName , "" ); return; + case Object: append( fieldName , BSONObj() ); return; case Array: - appendArray( field.c_str() , BSONObj() ); return; + appendArray( fieldName , BSONObj() ); return; case BinData: - appendBinData( field.c_str() , 0 , Function , (const char *) 0 ); return; + appendBinData( fieldName , 0 , Function , (const char *) 0 ); return; case Undefined: - appendUndefined( field.c_str() ); return; - case RegEx: appendRegex( field.c_str() , "" ); return; + appendUndefined( fieldName ); return; + case RegEx: appendRegex( fieldName , "" ); return; case DBRef: { OID o; memset(&o, 0, sizeof(o)); - appendDBRef( field.c_str() , "" , o ); + appendDBRef( fieldName , "" , o ); return; } - case Code: appendCode( field.c_str() , "" ); return; - case CodeWScope: appendCodeWScope( field.c_str() , "" , BSONObj() ); return; - case Timestamp: appendTimestamp( field.c_str() , 0); return; + case Code: appendCode( fieldName , "" ); return; + case CodeWScope: appendCodeWScope( fieldName , "" , BSONObj() ); return; + case Timestamp: appendTimestamp( fieldName , 0); return; }; log() << "type not support for appendMinElementForType: " << t << endl; uassert( 10061 , "type not supported for appendMinElementForType" , false ); } - void BSONObjBuilder::appendMaxForType( const string& field , int t ){ + void BSONObjBuilder::appendMaxForType( const StringData& fieldName , int t ){ switch ( t ){ - case MinKey: appendMaxKey( field.c_str() ); break; - case MaxKey: appendMaxKey( field.c_str() ); break; + case MinKey: appendMaxKey( fieldName ); break; + case MaxKey: appendMaxKey( fieldName ); break; case NumberInt: case NumberDouble: case NumberLong: - append( field.c_str() , numeric_limits::max() ); + append( fieldName , numeric_limits::max() ); break; case BinData: - appendMinForType( field , jstOID ); + appendMinForType( fieldName , jstOID ); break; case jstOID: { OID o; memset(&o, 0xFF, sizeof(o)); - appendOID( field.c_str() , &o); + appendOID( fieldName , &o); break; } case Undefined: case jstNULL: - appendMinForType( field , NumberInt ); - case Bool: appendBool( field.c_str() , true); break; - case Date: appendDate( field.c_str() , 0xFFFFFFFFFFFFFFFFLL ); break; + appendMinForType( fieldName , NumberInt ); + case Bool: appendBool( fieldName , true); break; + case Date: appendDate( fieldName , 0xFFFFFFFFFFFFFFFFLL ); break; case Symbol: - case String: append( field.c_str() , BSONObj() ); break; + case String: append( fieldName , BSONObj() ); break; case Code: case CodeWScope: - appendCodeWScope( field.c_str() , "ZZZ" , BSONObj() ); break; + appendCodeWScope( fieldName , "ZZZ" , BSONObj() ); break; case Timestamp: - appendTimestamp( field.c_str() , numeric_limits::max() ); break; + appendTimestamp( fieldName , numeric_limits::max() ); break; default: - appendMinForType( field , t + 1 ); + appendMinForType( fieldName , t + 1 ); } } @@ -1586,8 +1361,8 @@ "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", }; - bool BSONObjBuilder::appendAsNumber( const string& fieldName , const string& data ){ - if ( data.size() == 0 ) + bool BSONObjBuilder::appendAsNumber( const StringData& fieldName , const string& data ){ + if ( data.size() == 0 || data == "-") return false; unsigned int pos=0; @@ -1612,7 +1387,7 @@ if ( hasDec ){ double d = atof( data.c_str() ); - append( fieldName.c_str() , d ); + append( fieldName , d ); return true; } @@ -1665,5 +1440,27 @@ _cur = 0; } + /** transform a BSON array into a vector of BSONElements. + we match array # positions with their vector position, and ignore + any non-numeric fields. + */ + vector BSONElement::Array() const { + chk(mongo::Array); + vector v; + BSONObjIterator i(Obj()); + while( i.more() ) { + BSONElement e = i.next(); + const char *f = e.fieldName(); + try { + unsigned u = stringToNum(f); + assert( u < 4096 ); + if( u >= v.size() ) + v.resize(u+1); + v[u] = e; + } + catch(unsigned) { } + } + return v; + } } // namespace mongo diff -Nru mongodb-1.4.4/db/jsobj.h mongodb-1.6.3/db/jsobj.h --- mongodb-1.4.4/db/jsobj.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/jsobj.h 2010-09-24 10:02:42.000000000 -0700 @@ -23,2024 +23,25 @@ "BSON" stands for "binary JSON" -- ie a binary way to represent objects that would be represented in JSON (plus a few extensions useful for databases & other languages). - http://www.mongodb.org/display/DOCS/BSON + http://www.bsonspec.org/ */ #pragma once -#include "../stdafx.h" -#include "../util/builder.h" +#include "../pch.h" +#include "../bson/util/builder.h" #include "../util/optime.h" #include "boost/utility.hpp" - #include +#include "../bson/bsontypes.h" +#include "../bson/oid.h" +#include "../bson/bsonelement.h" +#include "../bson/bsonobj.h" +#include "../bson/bsonmisc.h" +#include "../bson/bsonobjbuilder.h" +#include "../bson/bsonobjiterator.h" +#include "../bson/bsoninlines.h" +#include "../bson/ordering.h" +#include "../bson/stringdata.h" -namespace mongo { - - class BSONObj; - struct BSONArray; // empty subclass of BSONObj useful for overloading - class BSONElement; - class Record; - class BSONObjBuilder; - class BSONArrayBuilder; - class BSONObjBuilderValueStream; - -#pragma pack(1) - - /** - the complete list of valid BSON types - */ - enum BSONType { - /** smaller than all other types */ - MinKey=-1, - /** end of object */ - EOO=0, - /** double precision floating point value */ - NumberDouble=1, - /** character string, stored in utf8 */ - String=2, - /** an embedded object */ - Object=3, - /** an embedded array */ - Array=4, - /** binary data */ - BinData=5, - /** Undefined type */ - Undefined=6, - /** ObjectId */ - jstOID=7, - /** boolean type */ - Bool=8, - /** date type */ - Date=9, - /** null type */ - jstNULL=10, - /** regular expression, a pattern with options */ - RegEx=11, - /** deprecated / will be redesigned */ - DBRef=12, - /** deprecated / use CodeWScope */ - Code=13, - /** a programming language (e.g., Python) symbol */ - Symbol=14, - /** javascript code that can execute on the database server, with SavedContext */ - CodeWScope=15, - /** 32 bit signed integer */ - NumberInt = 16, - /** Updated to a Date with value next OpTime on insert */ - Timestamp = 17, - /** 64 bit integer */ - NumberLong = 18, - /** max type that is not MaxKey */ - JSTypeMax=18, - /** larger than all other types */ - MaxKey=127 - }; - - /* subtypes of BinData. - bdtCustom and above are ones that the JS compiler understands, but are - opaque to the database. - */ - enum BinDataType { Function=1, ByteArray=2, bdtUUID = 3, MD5Type=5, bdtCustom=128 }; - - /** Object ID type. - BSON objects typically have an _id field for the object id. This field should be the first - member of the object when present. class OID is a special type that is a 12 byte id which - is likely to be unique to the system. You may also use other types for _id's. - When _id field is missing from a BSON object, on an insert the database may insert one - automatically in certain circumstances. - - Warning: You must call OID::newState() after a fork(). - */ - class OID { - union { - struct{ - long long a; - unsigned b; - }; - unsigned char data[12]; - }; - static unsigned _machine; - public: - /** call this after a fork */ - static void newState(); - - /** initialize to 'null' */ - void clear() { a = 0; b = 0; } - - const unsigned char *getData() const { return data; } - - bool operator==(const OID& r) { - return a==r.a&&b==r.b; - } - bool operator!=(const OID& r) { - return a!=r.a||b!=r.b; - } - - /** The object ID output as 24 hex digits. */ - string str() const { - stringstream s; - s << hex; - // s.fill( '0' ); - // s.width( 2 ); - // fill wasn't working so doing manually... - for( int i = 0; i < 8; i++ ) { - unsigned u = data[i]; - if( u < 16 ) s << '0'; - s << u; - } - const unsigned char * raw = (const unsigned char*)&b; - for( int i = 0; i < 4; i++ ) { - unsigned u = raw[i]; - if( u < 16 ) s << '0'; - s << u; - } - /* - s.width( 16 ); - s << a; - s.width( 8 ); - s << b; - s << dec; - */ - return s.str(); - } - - /** - sets the contents to a new oid / randomized value - */ - void init(); - - /** Set to the hex string value specified. */ - void init( string s ); - - }; - ostream& operator<<( ostream &s, const OID &o ); - - /** Formatting mode for generating JSON from BSON. - See - for details. - */ - enum JsonStringFormat { - /** strict RFC format */ - Strict, - /** 10gen format, which is close to JS format. This form is understandable by - javascript running inside the Mongo server via eval() */ - TenGen, - /** Javascript JSON compatible */ - JS - }; - - /* l and r MUST have same type when called: check that first. */ - int compareElementValues(const BSONElement& l, const BSONElement& r); - -#pragma pack() - - /* internals - - -------- size() ------------ - -fieldNameSize- - value() - type() - */ - /** BSONElement represents an "element" in a BSONObj. So for the object { a : 3, b : "abc" }, - 'a : 3' is the first element (key+value). - - The BSONElement object points into the BSONObj's data. Thus the BSONObj must stay in scope - for the life of the BSONElement. - */ - class BSONElement { - friend class BSONObjIterator; - friend class BSONObj; - public: - string toString( bool includeFieldName = true ) const; - operator string() const { return toString(); } - string jsonString( JsonStringFormat format, bool includeFieldNames = true ) const; - - /** Returns the type of the element */ - BSONType type() const { - return (BSONType) *data; - } - - /** returns the tyoe of the element fixed for the main type - the main purpose is numbers. any numeric type will return NumberDouble - Note: if the order changes, indexes have to be re-built or than can be corruption - */ - int canonicalType() const { - BSONType t = type(); - switch ( t ){ - case MinKey: - case MaxKey: - return t; - case EOO: - case Undefined: - return 0; - case jstNULL: - return 5; - case NumberDouble: - case NumberInt: - case NumberLong: - return 10; - case String: - case Symbol: - return 15; - case Object: - return 20; - case Array: - return 25; - case BinData: - return 30; - case jstOID: - return 35; - case Bool: - return 40; - case Date: - case Timestamp: - return 45; - case RegEx: - return 50; - case DBRef: - return 55; - case Code: - return 60; - case CodeWScope: - return 65; - default: - assert(0); - return -1; - } - } - - /** Indicates if it is the end-of-object element, which is present at the end of - every BSON object. - */ - bool eoo() const { - return type() == EOO; - } - - /** Size of the element. - @param maxLen If maxLen is specified, don't scan more than maxLen bytes to calculate size. - */ - int size( int maxLen = -1 ) const; - - /** Wrap this element up as a singleton object. */ - BSONObj wrap() const; - - /** Wrap this element up as a singleton object with a new name. */ - BSONObj wrap( const char* newName) const; - - /** field name of the element. e.g., for - name : "Joe" - "name" is the fieldname - */ - const char * fieldName() const { - if ( eoo() ) return ""; // no fieldname for it. - return data + 1; - } - - /** raw data of the element's value (so be careful). */ - const char * value() const { - return (data + fieldNameSize() + 1); - } - /** size in bytes of the element's value (when applicable). */ - int valuesize() const { - return size() - fieldNameSize() - 1; - } - - bool isBoolean() const { - return type() == Bool; - } - - /** @return value of a boolean element. - You must assure element is a boolean before - calling. */ - bool boolean() const { - return *value() ? true : false; - } - - /** Retrieve a java style date value from the element. - Ensure element is of type Date before calling. - */ - Date_t date() const { - return *reinterpret_cast< const Date_t* >( value() ); - } - - /** Convert the value to boolean, regardless of its type, in a javascript-like fashion - (i.e., treat zero and null as false). - */ - bool trueValue() const { - switch( type() ) { - case NumberLong: - return *reinterpret_cast< const long long* >( value() ) != 0; - case NumberDouble: - return *reinterpret_cast< const double* >( value() ) != 0; - case NumberInt: - return *reinterpret_cast< const int* >( value() ) != 0; - case Bool: - return boolean(); - case EOO: - case jstNULL: - case Undefined: - return false; - - default: - ; - } - return true; - } - - /** True if element is of a numeric type. */ - bool isNumber() const { - switch( type() ) { - case NumberLong: - case NumberDouble: - case NumberInt: - return true; - default: - return false; - } - } - - bool isSimpleType() const { - switch( type() ){ - case NumberLong: - case NumberDouble: - case NumberInt: - case String: - case Bool: - case Date: - case jstOID: - return true; - default: - return false; - } - } - - /** Return double value for this field. MUST be NumberDouble type. */ - double _numberDouble() const {return *reinterpret_cast< const double* >( value() ); } - /** Return double value for this field. MUST be NumberInt type. */ - int _numberInt() const {return *reinterpret_cast< const int* >( value() ); } - /** Return double value for this field. MUST be NumberLong type. */ - long long _numberLong() const {return *reinterpret_cast< const long long* >( value() ); } - - /** Retrieve int value for the element safely. Zero returned if not a number. */ - int numberInt() const { - switch( type() ) { - case NumberDouble: - return (int) _numberDouble(); - case NumberInt: - return _numberInt(); - case NumberLong: - return (int) _numberLong(); - default: - return 0; - } - } - - /** Retrieve long value for the element safely. Zero returned if not a number. */ - long long numberLong() const { - switch( type() ) { - case NumberDouble: - return (long long) _numberDouble(); - case NumberInt: - return _numberInt(); - case NumberLong: - return _numberLong(); - default: - return 0; - } - } - - /** Retrieve the numeric value of the element. If not of a numeric type, returns 0. - NOTE: casts to double, data loss may occur with large (>52 bit) NumberLong values. - */ - double numberDouble() const { - switch( type() ) { - case NumberDouble: - return _numberDouble(); - case NumberInt: - return *reinterpret_cast< const int* >( value() ); - case NumberLong: - return (double) *reinterpret_cast< const long long* >( value() ); - default: - return 0; - } - } - /** Retrieve the numeric value of the element. If not of a numeric type, returns 0. - NOTE: casts to double, data loss may occur with large (>52 bit) NumberLong values. - */ - double number() const { return numberDouble(); } - - /** Retrieve the object ID stored in the object. - You must ensure the element is of type jstOID first. */ - const OID &__oid() const { - return *reinterpret_cast< const OID* >( value() ); - } - - /** True if element is null. */ - bool isNull() const { - return type() == jstNULL; - } - - /** Size (length) of a string element. - You must assure of type String first. */ - int valuestrsize() const { - return *reinterpret_cast< const int* >( value() ); - } - - // for objects the size *includes* the size of the size field - int objsize() const { - return *reinterpret_cast< const int* >( value() ); - } - - /** Get a string's value. Also gives you start of the real data for an embedded object. - You must assure data is of an appropriate type first -- see also valuestrsafe(). - */ - const char * valuestr() const { - return value() + 4; - } - - /** Get the string value of the element. If not a string returns "". */ - const char *valuestrsafe() const { - return type() == String ? valuestr() : ""; - } - /** Get the string value of the element. If not a string returns "". */ - string str() const { return valuestrsafe(); } - - /** Get javascript code of a CodeWScope data element. */ - const char * codeWScopeCode() const { - return value() + 8; - } - /** Get the scope SavedContext of a CodeWScope data element. */ - const char * codeWScopeScopeData() const { - // TODO fix - return codeWScopeCode() + strlen( codeWScopeCode() ) + 1; - } - - /** Get the embedded object this element holds. */ - BSONObj embeddedObject() const; - - /* uasserts if not an object */ - BSONObj embeddedObjectUserCheck() const; - - BSONObj codeWScopeObject() const; - - string ascode() const { - switch( type() ){ - case String: - case Code: - return valuestr(); - case CodeWScope: - return codeWScopeCode(); - default: - log() << "can't convert type: " << (int)(type()) << " to code" << endl; - } - uassert( 10062 , "not code" , 0 ); - return ""; - } - - /** Get binary data. Element must be of type BinData */ - const char *binData(int& len) const { - // BinData: - assert( type() == BinData ); - len = valuestrsize(); - return value() + 5; - } - - BinDataType binDataType() const { - // BinData: - assert( type() == BinData ); - unsigned char c = (value() + 4)[0]; - return (BinDataType)c; - } - - /** Retrieve the regex string for a Regex element */ - const char *regex() const { - assert(type() == RegEx); - return value(); - } - - /** Retrieve the regex flags (options) for a Regex element */ - const char *regexFlags() const { - const char *p = regex(); - return p + strlen(p) + 1; - } - - /** like operator== but doesn't check the fieldname, - just the value. - */ - bool valuesEqual(const BSONElement& r) const { - switch( type() ) { - case NumberLong: - return _numberLong() == r.numberLong() && r.isNumber(); - case NumberDouble: - return _numberDouble() == r.number() && r.isNumber(); - case NumberInt: - return _numberInt() == r.numberInt() && r.isNumber(); - default: - ; - } - bool match= valuesize() == r.valuesize() && - memcmp(value(),r.value(),valuesize()) == 0; - return match && canonicalType() == r.canonicalType(); - } - - /** Returns true if elements are equal. */ - bool operator==(const BSONElement& r) const { - if ( strcmp(fieldName(), r.fieldName()) != 0 ) - return false; - return valuesEqual(r); - } - - - /** Well ordered comparison. - @return <0: l0:l>r - order by type, field name, and field value. - If considerFieldName is true, pay attention to the field name. - */ - int woCompare( const BSONElement &e, bool considerFieldName = true ) const; - - const char * rawdata() const { - return data; - } - - /** 0 == Equality, just not defined yet */ - int getGtLtOp( int def = 0 ) const; - - /** Constructs an empty element */ - BSONElement(); - - /** Check that data is internally consistent. */ - void validate() const; - - /** True if this element may contain subobjects. */ - bool mayEncapsulate() const { - switch ( type() ){ - case Object: - case Array: - case CodeWScope: - return true; - default: - return false; - } - } - - /** True if this element can be a BSONObj */ - bool isABSONObj() const { - switch( type() ){ - case Object: - case Array: - return true; - default: - return false; - } - } - - Date_t timestampTime() const{ - unsigned long long t = ((unsigned int*)(value() + 4 ))[0]; - return t * 1000; - } - unsigned int timestampInc() const{ - return ((unsigned int*)(value() ))[0]; - } - - const char * dbrefNS() const { - uassert( 10063 , "not a dbref" , type() == DBRef ); - return value() + 4; - } - - const OID& dbrefOID() const { - uassert( 10064 , "not a dbref" , type() == DBRef ); - const char * start = value(); - start += 4 + *reinterpret_cast< const int* >( start ); - return *reinterpret_cast< const OID* >( start ); - } - - bool operator<( const BSONElement& other ) const { - int x = (int)canonicalType() - (int)other.canonicalType(); - if ( x < 0 ) return true; - else if ( x > 0 ) return false; - return compareElementValues(*this,other) < 0; - } - - // If maxLen is specified, don't scan more than maxLen bytes. - BSONElement(const char *d, int maxLen = -1) : data(d) { - fieldNameSize_ = -1; - if ( eoo() ) - fieldNameSize_ = 0; - else { - if ( maxLen != -1 ) { - int size = strnlen( fieldName(), maxLen - 1 ); - massert( 10333 , "Invalid field name", size != -1 ); - fieldNameSize_ = size + 1; - } - } - totalSize = -1; - } - private: - const char *data; - mutable int fieldNameSize_; // cached value - int fieldNameSize() const { - if ( fieldNameSize_ == -1 ) - fieldNameSize_ = (int)strlen( fieldName() ) + 1; - return fieldNameSize_; - } - mutable int totalSize; /* caches the computed size */ - }; - - int getGtLtOp(const BSONElement& e); - - struct BSONElementCmpWithoutField { - bool operator()( const BSONElement &l, const BSONElement &r ) const { - return l.woCompare( r, false ) < 0; - } - }; - - typedef set< BSONElement, BSONElementCmpWithoutField > BSONElementSet; - - /** - C++ representation of a "BSON" object -- that is, an extended JSON-style - object in a binary representation. - - Note that BSONObj's have a smart pointer capability built in -- so you can - pass them around by value. The reference counts used to implement this - do not use locking, so copying and destroying BSONObj's are not thread-safe - operations. - - BSON object format: - - \code - {}* EOO - - totalSize includes itself. - - Data: - Bool: - EOO: nothing follows - Undefined: nothing follows - OID: an OID object - NumberDouble: - NumberInt: - String: - Date: <8bytes> - Regex: - Object: a nested object, leading with its entire size, which terminates with EOO. - Array: same as object - DBRef: - DBRef: a database reference: basically a collection name plus an Object ID - BinData: - Code: a function (not a closure): same format as String. - Symbol: a language symbol (say a python symbol). same format as String. - Code With Scope: - \endcode - */ - class BSONObj { - friend class BSONObjIterator; - class Holder { - public: - Holder( const char *objdata ) : - _objdata( objdata ) { - } - ~Holder() { - free((void *)_objdata); - _objdata = 0; - } - private: - const char *_objdata; - }; - const char *_objdata; - boost::shared_ptr< Holder > _holder; - void init(const char *data, bool ifree) { - if ( ifree ) - _holder.reset( new Holder( data ) ); - _objdata = data; - if ( ! isValid() ){ - stringstream ss; - ss << "Invalid BSONObj spec size: " << objsize(); - try { - BSONElement e = firstElement(); - ss << " first element:" << e.toString() << " "; - } - catch ( ... ){} - string s = ss.str(); - massert( 10334 , s , 0 ); - } - } -#pragma pack(1) - static struct EmptyObject { - EmptyObject() { - len = 5; - jstype = EOO; - } - int len; - char jstype; - } emptyObject; -#pragma pack() - public: - /** Construct a BSONObj from data in the proper format. - @param ifree true if the BSONObj should free() the msgdata when - it destructs. - */ - explicit BSONObj(const char *msgdata, bool ifree = false) { - init(msgdata, ifree); - } - BSONObj(const Record *r); - /** Construct an empty BSONObj -- that is, {}. */ - BSONObj() : _objdata( reinterpret_cast< const char * >( &emptyObject ) ) { } - // defensive - ~BSONObj() { _objdata = 0; } - - void appendSelfToBufBuilder(BufBuilder& b) const { - assert( objsize() ); - b.append(reinterpret_cast( objdata() ), objsize()); - } - - /** Readable representation of a BSON object in an extended JSON-style notation. - This is an abbreviated representation which might be used for logging. - */ - string toString() const; - operator string() const { return toString(); } - - /** Properly formatted JSON string. */ - string jsonString( JsonStringFormat format = Strict ) const; - - /** note: addFields always adds _id even if not specified */ - int addFields(BSONObj& from, set& fields); /* returns n added */ - - /** returns # of top level fields in the object - note: iterates to count the fields - */ - int nFields() const; - - /** adds the field names to the fields set. does NOT clear it (appends). */ - int getFieldNames(set& fields) const; - - /** return has eoo() true if no match - supports "." notation to reach into embedded objects - */ - BSONElement getFieldDotted(const char *name) const; - /** Like getFieldDotted(), but expands multikey arrays and returns all matching objects - */ - void getFieldsDotted(const char *name, BSONElementSet &ret ) const; - /** Like getFieldDotted(), but returns first array encountered while traversing the - dotted fields of name. The name variable is updated to represent field - names with respect to the returned element. */ - BSONElement getFieldDottedOrArray(const char *&name) const; - - /** Get the field of the specified name. eoo() is true on the returned - element if not found. - */ - BSONElement getField(const char *name) const; - - /** Get the field of the specified name. eoo() is true on the returned - element if not found. - */ - BSONElement getField(const string name) const { - return getField( name.c_str() ); - }; - - /** Get the field of the specified name. eoo() is true on the returned - element if not found. - */ - BSONElement operator[] (const char *field) const { - return getField(field); - } - - BSONElement operator[] (const string& field) const { - return getField(field); - } - - BSONElement operator[] (int field) const { - stringstream ss; - ss << field; - string s = ss.str(); - return getField(s.c_str()); - } - - /** @return true if field exists */ - bool hasField( const char * name )const { - return ! getField( name ).eoo(); - } - - /** @return "" if DNE or wrong type */ - const char * getStringField(const char *name) const; - - /** @return subobject of the given name */ - BSONObj getObjectField(const char *name) const; - - /** @return INT_MIN if not present - does some type conversions */ - int getIntField(const char *name) const; - - /** @return false if not present */ - bool getBoolField(const char *name) const; - - /** makes a new BSONObj with the fields specified in pattern. - fields returned in the order they appear in pattern. - if any field is missing or undefined in the object, that field in the - output will be null. - - sets output field names to match pattern field names. - If an array is encountered while scanning the dotted names in pattern, - that field is treated as missing. - */ - BSONObj extractFieldsDotted(BSONObj pattern) const; - - /** - sets element field names to empty string - If a field in pattern is missing, it is omitted from the returned - object. - */ - BSONObj extractFieldsUnDotted(BSONObj pattern) const; - - /** extract items from object which match a pattern object. - e.g., if pattern is { x : 1, y : 1 }, builds an object with - x and y elements of this object, if they are present. - returns elements with original field names - */ - BSONObj extractFields(const BSONObj &pattern , bool fillWithNull=false) const; - - BSONObj filterFieldsUndotted(const BSONObj &filter, bool inFilter) const; - - BSONElement getFieldUsingIndexNames(const char *fieldName, const BSONObj &indexKey) const; - - /** @return the raw data of the object */ - const char *objdata() const { - return _objdata; - } - /** @return total size of the BSON object in bytes */ - int objsize() const { - return *(reinterpret_cast(objdata())); - } - - bool isValid(); - - /** @return if the user is a valid user doc - criter: isValid() no . or $ field names - */ - bool okForStorage() const; - - /** @return true if object is empty -- i.e., {} */ - bool isEmpty() const { - return objsize() <= 5; - } - - void dump() const { - out() << hex; - const char *p = objdata(); - for ( int i = 0; i < objsize(); i++ ) { - out() << i << '\t' << ( 0xff & ( (unsigned) *p ) ); - if ( *p >= 'A' && *p <= 'z' ) - out() << '\t' << *p; - out() << endl; - p++; - } - } - - // Alternative output format - string hexDump() const; - - /**wo='well ordered'. fields must be in same order in each object. - Ordering is with respect to the signs of the elements in idxKey. - @return <0 if l0 if l>r - */ - int woCompare(const BSONObj& r, const BSONObj &idxKey = BSONObj(), - bool considerFieldName=true) const; - - int woSortOrder( const BSONObj& r , const BSONObj& sortKey ) const; - - /** This is "shallow equality" -- ints and doubles won't match. for a - deep equality test use woCompare (which is slower). - */ - bool woEqual(const BSONObj& r) const { - int os = objsize(); - if ( os == r.objsize() ) { - return (os == 0 || memcmp(objdata(),r.objdata(),os)==0); - } - return false; - } - - /** @return first field of the object */ - BSONElement firstElement() const { - return BSONElement(objdata() + 4); - } - - /** use getField() instead. */ - //BSONElement getField(const char *name) const; - //BSONElement getField(string name) const { - - /** @return true if field exists in the object */ - bool hasElement(const char *name) const; - - /** Get the _id field from the object. For good performance drivers should - assure that _id is the first element of the object; however, correct operation - is assured regardless. - @return true if found - */ - bool getObjectID(BSONElement& e) const; - - /** makes a copy of the object. - */ - BSONObj copy() const; - - /* make sure the data buffer is under the control of BSONObj's and not a remote buffer */ - BSONObj getOwned() const{ - if ( !isOwned() ) - return copy(); - return *this; - } - bool isOwned() const { return _holder.get() != 0; } - - /** @return A hash code for the object */ - int hash() const { - unsigned x = 0; - const char *p = objdata(); - for ( int i = 0; i < objsize(); i++ ) - x = x * 131 + p[i]; - return (x & 0x7fffffff) | 0x8000000; // must be > 0 - } - - // Return a version of this object where top level elements of types - // that are not part of the bson wire protocol are replaced with - // string identifier equivalents. - // TODO Support conversion of element types other than min and max. - BSONObj clientReadable() const; - - /** Return new object with the field names replaced by those in the - passed object. */ - BSONObj replaceFieldNames( const BSONObj &obj ) const; - - /** true unless corrupt */ - bool valid() const; - - string md5() const; - - bool operator==( const BSONObj& other ){ - return woCompare( other ) == 0; - } - - enum MatchType { - Equality = 0, - LT = 0x1, - LTE = 0x3, - GTE = 0x6, - GT = 0x4, - opIN = 0x8, // { x : { $in : [1,2,3] } } - NE = 0x9, - opSIZE = 0x0A, - opALL = 0x0B, - NIN = 0x0C, - opEXISTS = 0x0D, - opMOD = 0x0E, - opTYPE = 0x0F, - opREGEX = 0x10, - opOPTIONS = 0x11, - opELEM_MATCH = 0x12, - opNEAR = 0x13, - opWITHIN = 0x14, - }; - }; - ostream& operator<<( ostream &s, const BSONObj &o ); - ostream& operator<<( ostream &s, const BSONElement &e ); - - struct BSONArray: BSONObj { - // Don't add anything other than forwarding constructors!!! - BSONArray(): BSONObj() {} - explicit BSONArray(const BSONObj& obj): BSONObj(obj) {} - }; - - class BSONObjCmp { - public: - BSONObjCmp( const BSONObj &_order = BSONObj() ) : order( _order ) {} - bool operator()( const BSONObj &l, const BSONObj &r ) const { - return l.woCompare( r, order ) < 0; - } - private: - BSONObj order; - }; - - class BSONObjCmpDefaultOrder : public BSONObjCmp { - public: - BSONObjCmpDefaultOrder() : BSONObjCmp( BSONObj() ) {} - }; - - typedef set< BSONObj, BSONObjCmpDefaultOrder > BSONObjSetDefaultOrder; - - enum FieldCompareResult { - LEFT_SUBFIELD = -2, - LEFT_BEFORE = -1, - SAME = 0, - RIGHT_BEFORE = 1 , - RIGHT_SUBFIELD = 2 - }; - - FieldCompareResult compareDottedFieldNames( const string& l , const string& r ); - -/** Use BSON macro to build a BSONObj from a stream - - e.g., - BSON( "name" << "joe" << "age" << 33 ) - - with auto-generated object id: - BSON( GENOID << "name" << "joe" << "age" << 33 ) - - The labels GT, GTE, LT, LTE, NE can be helpful for stream-oriented construction - of a BSONObj, particularly when assembling a Query. For example, - BSON( "a" << GT << 23.4 << NE << 30 << "b" << 2 ) produces the object - { a: { \$gt: 23.4, \$ne: 30 }, b: 2 }. -*/ -#define BSON(x) (( mongo::BSONObjBuilder(64) << x ).obj()) - -/** Use BSON_ARRAY macro like BSON macro, but without keys - - BSONArray arr = BSON_ARRAY( "hello" << 1 << BSON( "foo" << BSON_ARRAY( "bar" << "baz" << "qux" ) ) ); - - */ -#define BSON_ARRAY(x) (( mongo::BSONArrayBuilder() << x ).arr()) - - /* Utility class to auto assign object IDs. - Example: - cout << BSON( GENOID << "z" << 3 ); // { _id : ..., z : 3 } - */ - extern struct IDLabeler { } GENOID; - - /* Utility class to add a Date element with the current time - Example: - cout << BSON( "created" << DATENOW ); // { created : "2009-10-09 11:41:42" } - */ - extern struct DateNowLabeler { } DATENOW; - - // Utility class to implement GT, GTE, etc as described above. - class Labeler { - public: - struct Label { - Label( const char *l ) : l_( l ) {} - const char *l_; - }; - Labeler( const Label &l, BSONObjBuilderValueStream *s ) : l_( l ), s_( s ) {} - template - BSONObjBuilder& operator<<( T value ); - - /* the value of the element e is appended i.e. for - "age" << GT << someElement - one gets - { age : { $gt : someElement's value } } - */ - BSONObjBuilder& operator<<( const BSONElement& e ); - private: - const Label &l_; - BSONObjBuilderValueStream *s_; - }; - - extern Labeler::Label GT; - extern Labeler::Label GTE; - extern Labeler::Label LT; - extern Labeler::Label LTE; - extern Labeler::Label NE; - extern Labeler::Label SIZE; - - // Utility class to implement BSON( key << val ) as described above. - class BSONObjBuilderValueStream : public boost::noncopyable { - public: - friend class Labeler; - BSONObjBuilderValueStream( BSONObjBuilder * builder ); - - BSONObjBuilder& operator<<( const BSONElement& e ); - - template - BSONObjBuilder& operator<<( T value ); - - BSONObjBuilder& operator<<(DateNowLabeler& id); - - Labeler operator<<( const Labeler::Label &l ); - - void endField( const char *nextFieldName = 0 ); - bool subobjStarted() const { return _fieldName != 0; } - - private: - const char * _fieldName; - BSONObjBuilder * _builder; - - bool haveSubobj() const { return _subobj.get() != 0; } - BSONObjBuilder *subobj(); - auto_ptr< BSONObjBuilder > _subobj; - }; - - /** - used in conjuction with BSONObjBuilder, allows for proper buffer size to prevent crazy memory usage - */ - class BSONSizeTracker { - public: -#define BSONSizeTrackerSize 10 - - BSONSizeTracker(){ - _pos = 0; - for ( int i=0; i= BSONSizeTrackerSize ) - _pos = 0; - } - - /** - * right now choosing largest size - */ - int getSize() const { - int x = 16; // sane min - for ( int i=0; i x ) - x = _sizes[i]; - } - return x; - } - - private: - int _pos; - int _sizes[BSONSizeTrackerSize]; - }; - - /** - utility for creating a BSONObj - */ - class BSONObjBuilder : boost::noncopyable { - public: - /** @param initsize this is just a hint as to the final size of the object */ - BSONObjBuilder(int initsize=512) : b(buf_), buf_(initsize), offset_( 0 ), s_( this ) , _tracker(0) { - b.skip(4); /*leave room for size field*/ - } - - /** @param baseBuilder construct a BSONObjBuilder using an existing BufBuilder */ - BSONObjBuilder( BufBuilder &baseBuilder ) : b( baseBuilder ), buf_( 0 ), offset_( baseBuilder.len() ), s_( this ) , _tracker(0) { - b.skip( 4 ); - } - - BSONObjBuilder( const BSONSizeTracker & tracker ) : b(buf_) , buf_(tracker.getSize() ), offset_(0), s_( this ) , _tracker( (BSONSizeTracker*)(&tracker) ){ - b.skip( 4 ); - } - - /** add all the fields from the object specified to this object */ - BSONObjBuilder& appendElements(BSONObj x); - - /** append element to the object we are building */ - void append( const BSONElement& e) { - assert( !e.eoo() ); // do not append eoo, that would corrupt us. the builder auto appends when done() is called. - b.append((void*) e.rawdata(), e.size()); - } - - /** append an element but with a new name */ - void appendAs(const BSONElement& e, const char *as) { - assert( !e.eoo() ); // do not append eoo, that would corrupt us. the builder auto appends when done() is called. - b.append((char) e.type()); - b.append(as); - b.append((void *) e.value(), e.valuesize()); - } - - void appendAs(const BSONElement& e, const string& as) { - appendAs( e , as.c_str() ); - } - - - /** add a subobject as a member */ - void append(const char *fieldName, BSONObj subObj) { - b.append((char) Object); - b.append(fieldName); - b.append((void *) subObj.objdata(), subObj.objsize()); - } - - void append(const string& fieldName , BSONObj subObj) { - append( fieldName.c_str() , subObj ); - } - - /** add header for a new subobject and return bufbuilder for writing to - the subobject's body */ - BufBuilder &subobjStart(const char *fieldName) { - b.append((char) Object); - b.append(fieldName); - return b; - } - - /** add a subobject as a member with type Array. Thus arr object should have "0", "1", ... - style fields in it. - */ - void appendArray(const char *fieldName, BSONObj subObj) { - b.append((char) Array); - b.append(fieldName); - b.append((void *) subObj.objdata(), subObj.objsize()); - } - void append(const char *fieldName, BSONArray arr) { appendArray(fieldName, arr); } - - - /** add header for a new subarray and return bufbuilder for writing to - the subarray's body */ - BufBuilder &subarrayStart(const char *fieldName) { - b.append((char) Array); - b.append(fieldName); - return b; - } - - /** Append a boolean element */ - void appendBool(const char *fieldName, int val) { - b.append((char) Bool); - b.append(fieldName); - b.append((char) (val?1:0)); - } - - /** Append a boolean element */ - void append(const char *fieldName, bool val) { - b.append((char) Bool); - b.append(fieldName); - b.append((char) (val?1:0)); - } - - /** Append a 32 bit integer element */ - void append(const char *fieldName, int n) { - b.append((char) NumberInt); - b.append(fieldName); - b.append(n); - } - /** Append a 32 bit integer element */ - void append(const string &fieldName, int n) { - append( fieldName.c_str(), n ); - } - - /** Append a 32 bit unsigned element - cast to a signed int. */ - void append(const char *fieldName, unsigned n) { append(fieldName, (int) n); } - - /** Append a NumberLong */ - void append(const char *fieldName, long long n) { - b.append((char) NumberLong); - b.append(fieldName); - b.append(n); - } - - /** Append a NumberLong */ - void append(const string& fieldName, long long n) { - append( fieldName.c_str() , n ); - } - - /** appends a number. if n < max(int)/2 then uses int, otherwise long long */ - void appendIntOrLL( const string& fieldName , long long n ){ - long long x = n; - if ( x < 0 ) - x = x * -1; - if ( x < ( numeric_limits::max() / 2 ) ) - append( fieldName.c_str() , (int)n ); - else - append( fieldName.c_str() , n ); - } - - - /** - * appendNumber is a series of method for appending the smallest sensible type - * mostly for JS - */ - void appendNumber( const string& fieldName , int n ){ - append( fieldName.c_str() , n ); - } - - void appendNumber( const string& fieldName , double d ){ - append( fieldName.c_str() , d ); - } - - void appendNumber( const string& fieldName , long long l ){ - static long long maxInt = (int)pow( 2.0 , 30.0 ); - static long long maxDouble = (long long)pow( 2.0 , 40.0 ); - - if ( l < maxInt ) - append( fieldName.c_str() , (int)l ); - else if ( l < maxDouble ) - append( fieldName.c_str() , (double)l ); - else - append( fieldName.c_str() , l ); - } - - /** Append a double element */ - BSONObjBuilder& append(const char *fieldName, double n) { - b.append((char) NumberDouble); - b.append(fieldName); - b.append(n); - return *this; - } - - /** tries to append the data as a number - * @return true if the data was able to be converted to a number - */ - bool appendAsNumber( const string& fieldName , const string& data ); - - /** Append a BSON Object ID (OID type). */ - void appendOID(const char *fieldName, OID *oid = 0 , bool generateIfBlank = false ) { - b.append((char) jstOID); - b.append(fieldName); - if ( oid ) - b.append( (void *) oid, 12 ); - else { - OID tmp; - if ( generateIfBlank ) - tmp.init(); - else - tmp.clear(); - b.append( (void *) &tmp, 12 ); - } - } - void append( const char *fieldName, OID oid ) { - appendOID( fieldName, &oid ); - } - /** Append a time_t date. - @param dt a C-style 32 bit date value, that is - the number of seconds since January 1, 1970, 00:00:00 GMT - */ - void appendTimeT(const char *fieldName, time_t dt) { - b.append((char) Date); - b.append(fieldName); - b.append(static_cast(dt) * 1000); - } - /** Append a date. - @param dt a Java-style 64 bit date value, that is - the number of milliseconds since January 1, 1970, 00:00:00 GMT - */ - void appendDate(const char *fieldName, Date_t dt) { - b.append((char) Date); - b.append(fieldName); - b.append(dt); - } - void append(const char *fieldName, Date_t dt) { - appendDate(fieldName, dt); - } - - /** Append a regular expression value - @param regex the regular expression pattern - @param regex options such as "i" or "g" - */ - void appendRegex(const char *fieldName, const char *regex, const char *options = "") { - b.append((char) RegEx); - b.append(fieldName); - b.append(regex); - b.append(options); - } - /** Append a regular expression value - @param regex the regular expression pattern - @param regex options such as "i" or "g" - */ - void appendRegex(string fieldName, string regex, string options = "") { - appendRegex(fieldName.c_str(), regex.c_str(), options.c_str()); - } - void appendCode(const char *fieldName, const char *code) { - b.append((char) Code); - b.append(fieldName); - b.append((int) strlen(code)+1); - b.append(code); - } - /** Append a string element */ - BSONObjBuilder& append(const char *fieldName, const char *str) { - b.append((char) String); - b.append(fieldName); - b.append((int) strlen(str)+1); - b.append(str); - return *this; - } - /** Append a string element */ - void append(const char *fieldName, string str) { - append(fieldName, str.c_str()); - } - void appendSymbol(const char *fieldName, const char *symbol) { - b.append((char) Symbol); - b.append(fieldName); - b.append((int) strlen(symbol)+1); - b.append(symbol); - } - - /** Append a Null element to the object */ - void appendNull( const char *fieldName ) { - b.append( (char) jstNULL ); - b.append( fieldName ); - } - - // Append an element that is less than all other keys. - void appendMinKey( const char *fieldName ) { - b.append( (char) MinKey ); - b.append( fieldName ); - } - // Append an element that is greater than all other keys. - void appendMaxKey( const char *fieldName ) { - b.append( (char) MaxKey ); - b.append( fieldName ); - } - - // Append a Timestamp field -- will be updated to next OpTime on db insert. - void appendTimestamp( const char *fieldName ) { - b.append( (char) Timestamp ); - b.append( fieldName ); - b.append( (unsigned long long) 0 ); - } - - void appendTimestamp( const char *fieldName , unsigned long long val ) { - b.append( (char) Timestamp ); - b.append( fieldName ); - b.append( val ); - } - - /** - * @param time - in millis (but stored in seconds) - */ - void appendTimestamp( const char *fieldName , unsigned long long time , unsigned int inc ){ - OpTime t( (unsigned) (time / 1000) , inc ); - appendTimestamp( fieldName , t.asDate() ); - } - - /* Deprecated (but supported) */ - void appendDBRef( const char *fieldName, const char *ns, const OID &oid ) { - b.append( (char) DBRef ); - b.append( fieldName ); - b.append( (int) strlen( ns ) + 1 ); - b.append( ns ); - b.append( (void *) &oid, 12 ); - } - - /** Append a binary data element - @param fieldName name of the field - @param len length of the binary data in bytes - @param type type information for the data. @see BinDataType. Use ByteArray if you - don't care about the type. - @param data the byte array - */ - void appendBinData( const char *fieldName, int len, BinDataType type, const char *data ) { - b.append( (char) BinData ); - b.append( fieldName ); - b.append( len ); - b.append( (char) type ); - b.append( (void *) data, len ); - } - void appendBinData( const char *fieldName, int len, BinDataType type, const unsigned char *data ) { - appendBinData(fieldName, len, type, (const char *) data); - } - - /** - @param len the length of data - */ - void appendBinDataArray( const char * fieldName , const char * data , int len ){ - b.append( (char) BinData ); - b.append( fieldName ); - b.append( len + 4 ); - b.append( (char)0x2 ); - b.append( len ); - b.append( (void *) data, len ); - } - - /** Append to the BSON object a field of type CodeWScope. This is a javascript code - fragment accompanied by some scope that goes with it. - */ - void appendCodeWScope( const char *fieldName, const char *code, const BSONObj &scope ) { - b.append( (char) CodeWScope ); - b.append( fieldName ); - b.append( ( int )( 4 + 4 + strlen( code ) + 1 + scope.objsize() ) ); - b.append( ( int ) strlen( code ) + 1 ); - b.append( code ); - b.append( ( void * )scope.objdata(), scope.objsize() ); - } - - void appendUndefined( const char *fieldName ) { - b.append( (char) Undefined ); - b.append( fieldName ); - } - - /* helper function -- see Query::where() for primary way to do this. */ - void appendWhere( const char *code, const BSONObj &scope ){ - appendCodeWScope( "$where" , code , scope ); - } - void appendWhere( const string &code, const BSONObj &scope ){ - appendWhere( code.c_str(), scope ); - } - - /** - these are the min/max when comparing, not strict min/max elements for a given type - */ - void appendMinForType( const string& field , int type ); - void appendMaxForType( const string& field , int type ); - - /** Append an array of values. */ - template < class T > - void append( const char *fieldName, const vector< T >& vals ) { - BSONObjBuilder arrBuilder; - for ( unsigned int i = 0; i < vals.size(); ++i ) - arrBuilder.append( numStr( i ).c_str(), vals[ i ] ); - marshalArray( fieldName, arrBuilder.done() ); - } - - /* Append an array of ints - void appendArray( const char *fieldName, const vector< int >& vals ) { - BSONObjBuilder arrBuilder; - for ( unsigned i = 0; i < vals.size(); ++i ) - arrBuilder.append( numStr( i ).c_str(), vals[ i ] ); - marshalArray( fieldName, arrBuilder.done() ); - }*/ - - /** The returned BSONObj will free the buffer when it is finished. */ - BSONObj obj() { - massert( 10335 , "builder does not own memory", owned() ); - int l; - return BSONObj(decouple(l), true); - } - - /** Fetch the object we have built. - BSONObjBuilder still frees the object when the builder goes out of - scope -- very important to keep in mind. Use obj() if you - would like the BSONObj to last longer than the builder. - */ - BSONObj done() { - return BSONObj(_done()); - } - - /** Peek at what is in the builder, but leave the builder ready for more appends. - The returned object is only valid until the next modification or destruction of the builder. - Intended use case: append a field if not already there. - */ - BSONObj asTempObj() { - BSONObj temp(_done()); - b.setlen(b.len()-1); //next append should overwrite the EOO - return temp; - } - - /* assume ownership of the buffer - you must then free it (with free()) */ - char* decouple(int& l) { - char *x = _done(); - assert( x ); - l = b.len(); - b.decouple(); - return x; - } - void decouple() { - b.decouple(); // post done() call version. be sure jsobj frees... - } - - void appendKeys( const BSONObj& keyPattern , const BSONObj& values ); - - private: - static const string numStrs[100]; // cache of 0 to 99 inclusive - public: - static string numStr( int i ) { - if (i>=0 && i<100) - return numStrs[i]; - - stringstream o; - o << i; - return o.str(); - } - - /** Stream oriented way to add field names and values. */ - BSONObjBuilderValueStream &operator<<(const char * name ) { - s_.endField( name ); - return s_; - } - - /** Stream oriented way to add field names and values. */ - BSONObjBuilder& operator<<( IDLabeler ) { - OID oid; - oid.init(); - appendOID("_id", &oid); - return *this; - } - - // prevent implicit string conversions which would allow bad things like BSON( BSON( "foo" << 1 ) << 2 ) - struct ForceExplicitString { - ForceExplicitString( const string &str ) : str_( str ) {} - string str_; - }; - - /** Stream oriented way to add field names and values. */ - BSONObjBuilderValueStream &operator<<( const ForceExplicitString& name ) { - return operator<<( name.str_.c_str() ); - } - - Labeler operator<<( const Labeler::Label &l ) { - massert( 10336 , "No subobject started", s_.subobjStarted() ); - return s_ << l; - } - - bool owned() const { - return &b == &buf_; - } - - private: - // Append the provided arr object as an array. - void marshalArray( const char *fieldName, const BSONObj &arr ) { - b.append( (char) Array ); - b.append( fieldName ); - b.append( (void *) arr.objdata(), arr.objsize() ); - } - - char* _done() { - s_.endField(); - b.append((char) EOO); - char *data = b.buf() + offset_; - int size = b.len() - offset_; - *((int*)data) = size; - if ( _tracker ) - _tracker->got( size ); - return data; - } - - BufBuilder &b; - BufBuilder buf_; - int offset_; - BSONObjBuilderValueStream s_; - BSONSizeTracker * _tracker; - }; - - class BSONArrayBuilder : boost::noncopyable{ - public: - BSONArrayBuilder() : _i(0), _b() {} - BSONArrayBuilder( BufBuilder &b ) : _i(0), _b(b) {} - - template - BSONArrayBuilder& append(const T& x){ - _b.append(num().c_str(), x); - return *this; - } - - BSONArrayBuilder& append(const BSONElement& e){ - _b.appendAs(e, num().c_str()); - return *this; - } - - template - BSONArrayBuilder& operator<<(const T& x){ - return append(x); - } - - void appendNull() { - _b.appendNull(num().c_str()); - } - - BSONArray arr(){ return BSONArray(_b.obj()); } - - BSONObj done() { return _b.done(); } - - template - BSONArrayBuilder& append(const char *name, const T& x){ - fill( name ); - append( x ); - return *this; - } - - BufBuilder &subobjStart( const char *name ) { - fill( name ); - return _b.subobjStart( num().c_str() ); - } - - BufBuilder &subarrayStart( const char *name ) { - fill( name ); - return _b.subarrayStart( num().c_str() ); - } - - void appendArray( const char *name, BSONObj subObj ) { - fill( name ); - _b.appendArray( num().c_str(), subObj ); - } - - void appendAs( const BSONElement &e, const char *name ) { - fill( name ); - append( e ); - } - - private: - void fill( const char *name ) { - char *r; - int n = strtol( name, &r, 10 ); - uassert( 13048, "can't append to array using string field name", !*r ); - while( _i < n ) - append( nullElt() ); - } - - static BSONElement nullElt() { - static BSONObj n = nullObj(); - return n.firstElement(); - } - - static BSONObj nullObj() { - BSONObjBuilder b; - b.appendNull( "" ); - return b.obj(); - } - - string num(){ return _b.numStr(_i++); } - int _i; - BSONObjBuilder _b; - }; - - - /** iterator for a BSONObj - - Note each BSONObj ends with an EOO element: so you will get more() on an empty - object, although next().eoo() will be true. - - todo: we may want to make a more stl-like iterator interface for this - with things like begin() and end() - */ - class BSONObjIterator { - public: - /** Create an iterator for a BSON object. - */ - BSONObjIterator(const BSONObj& jso) { - int sz = jso.objsize(); - if ( sz == 0 ) { - pos = theend = 0; - return; - } - pos = jso.objdata() + 4; - theend = jso.objdata() + sz; - } - /** @return true if more elements exist to be enumerated. */ - bool moreWithEOO() { - return pos < theend; - } - bool more(){ - return pos < theend && pos[0]; - } - /** @return the next element in the object. For the final element, element.eoo() will be true. */ - BSONElement next( bool checkEnd = false ) { - assert( pos < theend ); - BSONElement e( pos, checkEnd ? (int)(theend - pos) : -1 ); - pos += e.size( checkEnd ? (int)(theend - pos) : -1 ); - return e; - } - private: - const char *pos; - const char *theend; - }; - - /* iterator a BSONObj which is an array, in array order. - class JSArrayIter { - public: - BSONObjIterator(const BSONObj& jso) { - ... - } - bool more() { return ... } - BSONElement next() { - ... - } - }; - */ - - extern BSONObj maxKey; - extern BSONObj minKey; - - // a BoundList contains intervals specified by inclusive start - // and end bounds. The intervals should be nonoverlapping and occur in - // the specified direction of traversal. For example, given a simple index {i:1} - // and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList - // would be valid for index {i:-1} with direction -1. - typedef vector< pair< BSONObj, BSONObj > > BoundList; - - /*- just for testing -- */ - -#pragma pack(1) - struct JSObj1 { - JSObj1() { - totsize=sizeof(JSObj1); - n = NumberDouble; - strcpy_s(nname, 5, "abcd"); - N = 3.1; - s = String; - strcpy_s(sname, 7, "abcdef"); - slen = 10; - strcpy_s(sval, 10, "123456789"); - eoo = EOO; - } - unsigned totsize; - - char n; - char nname[5]; - double N; - - char s; - char sname[7]; - unsigned slen; - char sval[10]; - - char eoo; - }; -#pragma pack() - extern JSObj1 js1; - -#ifdef _DEBUG -#define CHECK_OBJECT( o , msg ) massert( 10337 , (string)"object not valid" + (msg) , (o).isValid() ) -#else -#define CHECK_OBJECT( o , msg ) -#endif - - inline BSONObj BSONElement::embeddedObjectUserCheck() const { - uassert( 10065 , "invalid parameter: expected an object", isABSONObj() ); - return BSONObj(value()); - } - - inline BSONObj BSONElement::embeddedObject() const { - assert( isABSONObj() ); - return BSONObj(value()); - } - - inline BSONObj BSONElement::codeWScopeObject() const { - assert( type() == CodeWScope ); - int strSizeWNull = *(int *)( value() + 4 ); - return BSONObj( value() + 4 + 4 + strSizeWNull ); - } - - inline BSONObj BSONObj::copy() const { - char *p = (char*) malloc(objsize()); - memcpy(p, objdata(), objsize()); - return BSONObj(p, true); - } - -// wrap this element up as a singleton object. - inline BSONObj BSONElement::wrap() const { - BSONObjBuilder b(size()+6); - b.append(*this); - return b.obj(); - } - - inline BSONObj BSONElement::wrap( const char * newName ) const { - BSONObjBuilder b(size()+6+strlen(newName)); - b.appendAs(*this,newName); - return b.obj(); - } - - - inline bool BSONObj::hasElement(const char *name) const { - if ( !isEmpty() ) { - BSONObjIterator it(*this); - while ( it.moreWithEOO() ) { - BSONElement e = it.next(); - if ( strcmp(name, e.fieldName()) == 0 ) - return true; - } - } - return false; - } - - inline BSONElement BSONObj::getField(const char *name) const { - BSONObjIterator i(*this); - while ( i.more() ) { - BSONElement e = i.next(); - if ( strcmp(e.fieldName(), name) == 0 ) - return e; - } - return BSONElement(); - } - - /* add all the fields from the object specified to this object */ - inline BSONObjBuilder& BSONObjBuilder::appendElements(BSONObj x) { - BSONObjIterator it(x); - while ( it.moreWithEOO() ) { - BSONElement e = it.next(); - if ( e.eoo() ) break; - append(e); - } - return *this; - } - - inline bool BSONObj::isValid(){ - return objsize() > 0 && objsize() <= 1024 * 1024 * 8; - } - - inline bool BSONObj::getObjectID(BSONElement& e) const { - BSONElement f = getField("_id"); - if( !f.eoo() ) { - e = f; - return true; - } - return false; - } - - inline BSONObjBuilderValueStream::BSONObjBuilderValueStream( BSONObjBuilder * builder ) { - _fieldName = 0; - _builder = builder; - } - - template - inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( T value ) { - _builder->append(_fieldName, value); - _fieldName = 0; - return *_builder; - } - - inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( const BSONElement& e ) { - _builder->appendAs( e , _fieldName ); - _fieldName = 0; - return *_builder; - } - - inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<(DateNowLabeler& id){ - _builder->appendDate(_fieldName, jsTime()); - _fieldName = 0; - return *_builder; - } - - inline Labeler BSONObjBuilderValueStream::operator<<( const Labeler::Label &l ) { - return Labeler( l, this ); - } - - inline void BSONObjBuilderValueStream::endField( const char *nextFieldName ) { - if ( _fieldName && haveSubobj() ) { - _builder->append( _fieldName, subobj()->done() ); - } - _subobj.reset(); - _fieldName = nextFieldName; - } - - inline BSONObjBuilder *BSONObjBuilderValueStream::subobj() { - if ( !haveSubobj() ) - _subobj.reset( new BSONObjBuilder() ); - return _subobj.get(); - } - - template inline - BSONObjBuilder& Labeler::operator<<( T value ) { - s_->subobj()->append( l_.l_, value ); - return *s_->_builder; - } - - inline - BSONObjBuilder& Labeler::operator<<( const BSONElement& e ) { - s_->subobj()->appendAs( e, l_.l_ ); - return *s_->_builder; - } - - // {a: {b:1}} -> {a.b:1} - void nested2dotted(BSONObjBuilder& b, const BSONObj& obj, const string& base=""); - inline BSONObj nested2dotted(const BSONObj& obj){ - BSONObjBuilder b; - nested2dotted(b, obj); - return b.obj(); - } - - // {a.b:1} -> {a: {b:1}} - void dotted2nested(BSONObjBuilder& b, const BSONObj& obj); - inline BSONObj dotted2nested(const BSONObj& obj){ - BSONObjBuilder b; - dotted2nested(b, obj); - return b.obj(); - } - - /* WARNING: nested/dotted conversions are not 100% reversible - * nested2dotted(dotted2nested({a.b: {c:1}})) -> {a.b.c: 1} - * also, dotted2nested ignores order - */ - - typedef map BSONMap; - inline BSONMap bson2map(const BSONObj& obj){ - BSONMap m; - BSONObjIterator it(obj); - while (it.more()){ - BSONElement e = it.next(); - m[e.fieldName()] = e; - } - return m; - } - - struct BSONElementFieldNameCmp { - bool operator()( const BSONElement &l, const BSONElement &r ) const { - return strcmp( l.fieldName() , r.fieldName() ) <= 0; - } - }; - - - typedef set BSONSortedElements; - inline BSONSortedElements bson2set( const BSONObj& obj ){ - BSONSortedElements s; - BSONObjIterator it(obj); - while ( it.more() ) - s.insert( it.next() ); - return s; - } - - class BSONObjIteratorSorted { - public: - BSONObjIteratorSorted( const BSONObj& o ); - - ~BSONObjIteratorSorted(){ - assert( _fields ); - delete[] _fields; - _fields = 0; - } - - bool more(){ - return _cur < _nfields; - } - - BSONElement next(){ - assert( _fields ); - if ( _cur < _nfields ) - return BSONElement( _fields[_cur++] ); - return BSONElement(); - } - - private: - const char ** _fields; - int _nfields; - int _cur; - }; - -} // namespace mongo +#include "../bson/bson_db.h" diff -Nru mongodb-1.4.4/db/jsobjmanipulator.h mongodb-1.6.3/db/jsobjmanipulator.h --- mongodb-1.4.4/db/jsobjmanipulator.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/jsobjmanipulator.h 2010-09-24 10:02:42.000000000 -0700 @@ -40,9 +40,11 @@ void setNumber(double d) { if ( _element.type() == NumberDouble ) *reinterpret_cast< double * >( value() ) = d; else if ( _element.type() == NumberInt ) *reinterpret_cast< int * >( value() ) = (int) d; + else assert(0); } void setLong(long long n) { - if( _element.type() == NumberLong ) *reinterpret_cast< long long * >( value() ) = n; + assert( _element.type() == NumberLong ); + *reinterpret_cast< long long * >( value() ) = n; } void setInt(int n) { assert( _element.type() == NumberInt ); diff -Nru mongodb-1.4.4/db/json.cpp mongodb-1.6.3/db/json.cpp --- mongodb-1.4.4/db/json.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/json.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,17 +16,44 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" + +#define BOOST_SPIRIT_THREADSAFE +#if BOOST_VERSION >= 103800 +#define BOOST_SPIRIT_USE_OLD_NAMESPACE +#include +#include +#include +#else +#include +#include +#include +#endif +#undef assert +#define assert MONGO_assert + #include "json.h" -#include "../util/builder.h" +#include "../bson/util/builder.h" #include "../util/base64.h" #include "../util/hex.h" + using namespace boost::spirit; namespace mongo { - struct ObjectBuilder { + struct ObjectBuilder : boost::noncopyable { + ~ObjectBuilder(){ + unsigned i = builders.size(); + if ( i ){ + i--; + for ( ; i>=1; i-- ){ + if ( builders[i] ){ + builders[i]->done(); + } + } + } + } BSONObjBuilder *back() { return builders.back().get(); } @@ -426,20 +453,20 @@ array = ch_p( '[' )[ arrayStart( self.b ) ] >> !elements >> ']'; elements = list_p(value, ch_p(',')[arrayNext( self.b )]); value = - oid[ oidEnd( self.b ) ] | - dbref[ dbrefEnd( self.b ) ] | - bindata[ binDataEnd( self.b ) ] | - date[ dateEnd( self.b ) ] | - regex[ regexEnd( self.b ) ] | str[ stringEnd( self.b ) ] | - singleQuoteStr[ stringEnd( self.b ) ] | number | integer | - object[ subobjectEnd( self.b ) ] | array[ arrayEnd( self.b ) ] | lexeme_d[ str_p( "true" ) ][ trueValue( self.b ) ] | lexeme_d[ str_p( "false" ) ][ falseValue( self.b ) ] | - lexeme_d[ str_p( "null" ) ][ nullValue( self.b ) ]; + lexeme_d[ str_p( "null" ) ][ nullValue( self.b ) ] | + singleQuoteStr[ stringEnd( self.b ) ] | + date[ dateEnd( self.b ) ] | + oid[ oidEnd( self.b ) ] | + bindata[ binDataEnd( self.b ) ] | + dbref[ dbrefEnd( self.b ) ] | + regex[ regexEnd( self.b ) ] | + object[ subobjectEnd( self.b ) ] ; // NOTE lexeme_d and rules don't mix well, so we have this mess. // NOTE We use range_p rather than cntrl_p, because the latter is locale dependent. str = lexeme_d[ ch_p( '"' )[ chClear( self.b ) ] >> @@ -530,21 +557,25 @@ ObjectBuilder &b; }; - BSONObj fromjson( const char *str ) { - if ( ! strlen(str) ) + BSONObj fromjson( const char *str , int* len) { + if ( str[0] == '\0' ){ + if (len) *len = 0; return BSONObj(); + } + ObjectBuilder b; JsonGrammar parser( b ); parse_info<> result = parse( str, parser, space_p ); - if ( !result.full ) { - int len = strlen( result.stop ); - if ( len > 10 ) - len = 10; - stringstream ss; - ss << "Failure parsing JSON string near: " << string( result.stop, len ); - massert( 10340 , ss.str(), false ); - } - return b.pop(); + if (len) { + *len = result.stop - str; + } else if ( !result.full ) { + int limit = strnlen(result.stop , 10); + if (limit == -1) limit = 10; + msgasserted(10340, "Failure parsing JSON string near: " + string( result.stop, limit )); + } + BSONObj ret = b.pop(); + assert( b.empty() ); + return ret; } BSONObj fromjson( const string &str ) { diff -Nru mongodb-1.4.4/db/json.h mongodb-1.6.3/db/json.h --- mongodb-1.4.4/db/json.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/json.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" namespace mongo { @@ -35,6 +35,7 @@ */ BSONObj fromjson(const string &str); - BSONObj fromjson(const char *str); + /** len will be size of JSON object in text chars. */ + BSONObj fromjson(const char *str, int* len=NULL); } // namespace mongo diff -Nru mongodb-1.4.4/db/lasterror.cpp mongodb-1.6.3/db/lasterror.cpp --- mongodb-1.4.4/db/lasterror.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/lasterror.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "../util/unittest.h" #include "../util/message.h" @@ -28,8 +28,24 @@ LastError LastError::noError; LastErrorHolder lastError; - mongo::mutex LastErrorHolder::_idsmutex; + mongo::mutex LastErrorHolder::_idsmutex("LastErrorHolder"); + bool isShell = false; + void raiseError(int code , const char *msg) { + LastError *le = lastError.get(); + if ( le == 0 ) { + /* might be intentional (non-user thread) */ + DEV { + static unsigned n; + if( ++n < 4 && !isShell ) log() << "dev: lastError==0 won't report:" << msg << endl; + } + } else if ( le->disabled ) { + log() << "lastError disabled, can't report: " << code << ":" << msg << endl; + } else { + le->raiseError(code, msg); + } + } + void LastError::appendSelf( BSONObjBuilder &b ) { if ( !valid ) { b.appendNull( "err" ); @@ -44,7 +60,11 @@ b.append( "code" , code ); if ( updatedExisting != NotUpdate ) b.appendBool( "updatedExisting", updatedExisting == True ); - b.append( "n", nObjects ); + if ( upsertedId.isSet() ) + b.append( "upserted" , upsertedId ); + if ( writebackId.isSet() ) + b.append( "writeback" , writebackId ); + b.appendNumber( "n", nObjects ); } void LastErrorHolder::setID( int id ){ @@ -119,6 +139,13 @@ remove( id ); } + + /** ok to call more than once. */ + void LastErrorHolder::initThread() { + if( _tl.get() ) return; + assert( _id.get() == 0 ); + _tl.reset( new LastError() ); + } void LastErrorHolder::reset( LastError * le ){ int id = _id.get(); @@ -132,10 +159,10 @@ status.time = time(0); status.lerr = le; } - + void prepareErrForNewRequest( Message &m, LastError * err ) { // a killCursors message shouldn't affect last error - if ( m.data->operation() == dbKillCursors ) { + if ( m.operation() == dbKillCursors ) { err->disabled = true; } else { err->disabled = false; @@ -143,19 +170,22 @@ } } - void LastErrorHolder::startRequest( Message& m ) { - int id = m.data->id & 0xFFFF0000; - setID( id ); + LastError * LastErrorHolder::startRequest( Message& m , int clientId ) { + assert( clientId ); + setID( clientId ); + LastError * le = _get( true ); prepareErrForNewRequest( m, le ); + return le; } void LastErrorHolder::startRequest( Message& m , LastError * connectionOwned ) { - if ( !connectionOwned->overridenById ) { - prepareErrForNewRequest( m, connectionOwned ); - return; - } - startRequest(m); + prepareErrForNewRequest( m, connectionOwned ); + } + + void LastErrorHolder::disconnect( int clientId ){ + if ( clientId ) + remove(clientId); } struct LastErrorHolderTest : public UnitTest { diff -Nru mongodb-1.4.4/db/lasterror.h mongodb-1.6.3/db/lasterror.h --- mongodb-1.4.4/db/lasterror.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/lasterror.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,9 +17,7 @@ #pragma once -#include -#undef assert -#define assert xassert +#include "../bson/oid.h" namespace mongo { class BSONObjBuilder; @@ -29,28 +27,34 @@ int code; string msg; enum UpdatedExistingType { NotUpdate, True, False } updatedExisting; - /* todo: nObjects should be 64 bit */ + OID upsertedId; + OID writebackId; long long nObjects; int nPrev; bool valid; - bool overridenById; bool disabled; + void writeback( OID& oid ){ + reset( true ); + writebackId = oid; + } void raiseError(int _code , const char *_msg) { reset( true ); code = _code; msg = _msg; } - void recordUpdate( bool _updatedExisting, long long nChanged ) { + void recordUpdate( bool _updateObjects , long long _nObjects , OID _upsertedId ){ reset( true ); - nObjects = nChanged; - updatedExisting = _updatedExisting ? True : False; + nObjects = _nObjects; + updatedExisting = _updateObjects ? True : False; + if ( _upsertedId.isSet() ) + upsertedId = _upsertedId; + } void recordDelete( long long nDeleted ) { reset( true ); nObjects = nDeleted; } LastError() { - overridenById = false; reset(); } void reset( bool _valid = false ) { @@ -61,8 +65,31 @@ nPrev = 1; valid = _valid; disabled = false; + upsertedId.clear(); + writebackId.clear(); } void appendSelf( BSONObjBuilder &b ); + + struct Disabled : boost::noncopyable { + Disabled( LastError * le ){ + _le = le; + if ( _le ){ + _prev = _le->disabled; + _le->disabled = true; + } else { + _prev = false; + } + } + + ~Disabled(){ + if ( _le ) + _le->disabled = _prev; + } + + LastError * _le; + bool _prev; + }; + static LastError noError; }; @@ -71,11 +98,22 @@ LastErrorHolder() : _id( 0 ) {} LastError * get( bool create = false ); + LastError * getSafe(){ + LastError * le = get(false); + if ( ! le ){ + log( LL_ERROR ) << " no LastError! id: " << getID() << endl; + assert( le ); + } + return le; + } LastError * _get( bool create = false ); // may return a disabled LastError void reset( LastError * le ); - + + /** ok to call more than once. */ + void initThread(); + /** * id of 0 means should use thread local management */ @@ -87,8 +125,10 @@ /** when db receives a message/request, call this */ void startRequest( Message& m , LastError * connectionOwned ); - void startRequest( Message& m ); + LastError * startRequest( Message& m , int clientId ); + void disconnect( int clientId ); + // used to disable lastError reporting while processing a killCursors message // disable causes get() to return 0. LastError *disableForCommand(); // only call once per command invocation! @@ -103,28 +143,7 @@ static mongo::mutex _idsmutex; map _ids; } lastError; - - inline void raiseError(int code , const char *msg) { - LastError *le = lastError.get(); - if ( le == 0 ) { - DEV log() << "warning: lastError==0 can't report:" << msg << '\n'; - } else if ( le->disabled ) { - log() << "lastError disabled, can't report: " << msg << endl; - } else { - le->raiseError(code, msg); - } - } - - inline void recordUpdate( bool updatedExisting, int nChanged ) { - LastError *le = lastError.get(); - if ( le ) - le->recordUpdate( updatedExisting, nChanged ); - } - - inline void recordDelete( int nDeleted ) { - LastError *le = lastError.get(); - if ( le ) - le->recordDelete( nDeleted ); - } + + void raiseError(int code , const char *msg); } // namespace mongo diff -Nru mongodb-1.4.4/db/matcher_covered.cpp mongodb-1.6.3/db/matcher_covered.cpp --- mongodb-1.4.4/db/matcher_covered.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/matcher_covered.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,80 @@ +// matcher_covered.cpp + +/* Matcher is our boolean expression evaluator for "where" clauses */ + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "matcher.h" +#include "../util/goodies.h" +#include "../util/unittest.h" +#include "diskloc.h" +#include "../scripting/engine.h" +#include "db.h" +#include "client.h" + +#include "pdfile.h" + +namespace mongo { + + CoveredIndexMatcher::CoveredIndexMatcher( const BSONObj &jsobj, const BSONObj &indexKeyPattern, bool alwaysUseRecord) : + _docMatcher( new Matcher( jsobj ) ), + _keyMatcher( *_docMatcher, indexKeyPattern ) + { + init( alwaysUseRecord ); + } + + CoveredIndexMatcher::CoveredIndexMatcher( const shared_ptr< Matcher > &docMatcher, const BSONObj &indexKeyPattern , bool alwaysUseRecord ) : + _docMatcher( docMatcher ), + _keyMatcher( *_docMatcher, indexKeyPattern ) + { + init( alwaysUseRecord ); + } + + void CoveredIndexMatcher::init( bool alwaysUseRecord ) { + _needRecord = + alwaysUseRecord || + ! ( _docMatcher->keyMatch() && + _keyMatcher.sameCriteriaCount( *_docMatcher ) && + ! _keyMatcher.hasType( BSONObj::opEXISTS ) ); + ; + } + + bool CoveredIndexMatcher::matchesCurrent( Cursor * cursor , MatchDetails * details ){ + return matches( cursor->currKey() , cursor->currLoc() , details ); + } + + bool CoveredIndexMatcher::matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details ) { + if ( details ) + details->reset(); + + if ( !_keyMatcher.matches(key, details ) ){ + return false; + } + + if ( ! _needRecord ){ + return true; + } + + if ( details ) + details->loadedObject = true; + + return _docMatcher->matches(recLoc.rec() , details ); + } + + +} diff -Nru mongodb-1.4.4/db/matcher.cpp mongodb-1.6.3/db/matcher.cpp --- mongodb-1.4.4/db/matcher.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/matcher.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "matcher.h" #include "../util/goodies.h" #include "../util/unittest.h" @@ -50,7 +50,9 @@ #define DEBUGMATCHER(x) namespace mongo { - + + extern BSONObj staticNull; + class Where { public: Where() { @@ -141,39 +143,6 @@ } - CoveredIndexMatcher::CoveredIndexMatcher(const BSONObj &jsobj, const BSONObj &indexKeyPattern) : - _keyMatcher(jsobj.filterFieldsUndotted(indexKeyPattern, true), - indexKeyPattern), - _docMatcher(jsobj) - { - _needRecord = ! ( - _docMatcher.keyMatch() && - _keyMatcher.jsobj.nFields() == _docMatcher.jsobj.nFields() && - ! _keyMatcher.hasType( BSONObj::opEXISTS ) - ); - - } - - bool CoveredIndexMatcher::matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details ) { - if ( details ) - details->reset(); - - if ( _keyMatcher.keyMatch() ) { - if ( !_keyMatcher.matches(key, details ) ){ - return false; - } - } - - if ( ! _needRecord ){ - return true; - } - - if ( details ) - details->loadedObject = true; - - return _docMatcher.matches(recLoc.rec() , details ); - } - void Matcher::addRegex(const char *fieldName, const char *regex, const char *flags, bool isNot){ @@ -230,9 +199,11 @@ case BSONObj::opALL: all = true; case BSONObj::opIN: + uassert( 13276 , "$in needs an array" , fe.isABSONObj() ); basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) ); break; case BSONObj::NIN: + uassert( 13277 , "$nin needs an array" , fe.isABSONObj() ); haveNeg = true; basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) ); break; @@ -279,6 +250,7 @@ } case BSONObj::opNEAR: case BSONObj::opWITHIN: + case BSONObj::opMAX_DISTANCE: break; default: uassert( 10069 , (string)"BUG - can't operator for: " + fn , 0 ); @@ -286,19 +258,50 @@ return true; } + void Matcher::parseOr( const BSONElement &e, bool subMatcher, list< shared_ptr< Matcher > > &matchers ) { + uassert( 13090, "nested $or/$nor not allowed", !subMatcher ); + uassert( 13086, "$or/$nor must be a nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 ); + BSONObjIterator j( e.embeddedObject() ); + while( j.more() ) { + BSONElement f = j.next(); + uassert( 13087, "$or/$nor match element must be an object", f.type() == Object ); + // until SERVER-109 this is never a covered index match, so don't constrain index key for $or matchers + matchers.push_back( shared_ptr< Matcher >( new Matcher( f.embeddedObject(), true ) ) ); + } + } + + bool Matcher::parseOrNor( const BSONElement &e, bool subMatcher ) { + const char *ef = e.fieldName(); + if ( ef[ 0 ] != '$' ) + return false; + if ( ef[ 1 ] == 'o' && ef[ 2 ] == 'r' && ef[ 3 ] == 0 ) { + parseOr( e, subMatcher, _orMatchers ); + } else if ( ef[ 1 ] == 'n' && ef[ 2 ] == 'o' && ef[ 3 ] == 'r' && ef[ 4 ] == 0 ) { + parseOr( e, subMatcher, _norMatchers ); + } else { + return false; + } + return true; + } + /* _jsobj - the query pattern */ - Matcher::Matcher(const BSONObj &_jsobj, const BSONObj &constrainIndexKey) : + Matcher::Matcher(const BSONObj &_jsobj, bool subMatcher) : where(0), jsobj(_jsobj), haveSize(), all(), hasArray(0), haveNeg(), _atomic(false), nRegex(0) { BSONObjIterator i(jsobj); while ( i.more() ) { BSONElement e = i.next(); + + if ( parseOrNor( e, subMatcher ) ) { + continue; + } if ( ( e.type() == CodeWScope || e.type() == Code || e.type() == String ) && strcmp(e.fieldName(), "$where")==0 ) { // $where: function()... - uassert( 10066 , "$where occurs twice?", where == 0 ); - uassert( 10067 , "$where query, but no script engine", globalScriptEngine ); + uassert( 10066 , "$where occurs twice?", where == 0 ); + uassert( 10067 , "$where query, but no script engine", globalScriptEngine ); + massert( 13089 , "no current client needed for $where" , haveClient() ); where = new Where(); where->scope = globalScriptEngine->getPooledScope( cc().ns() ); where->scope->localConnect( cc().database()->name.c_str() ); @@ -348,7 +351,7 @@ BSONObjIterator k( fe.embeddedObject() ); uassert( 13030, "$not cannot be empty", k.more() ); while( k.more() ) { - addOp( e, k.next(), true, regex, flags ); + addOp( e, k.next(), true, regex, flags ); } break; } @@ -388,8 +391,35 @@ // normal, simple case e.g. { a : "foo" } addBasic(e, BSONObj::Equality, false); } - - constrainIndexKey_ = constrainIndexKey; + } + + Matcher::Matcher( const Matcher &other, const BSONObj &key ) : + where(0), constrainIndexKey_( key ), haveSize(), all(), hasArray(0), haveNeg(), _atomic(false), nRegex(0) { + // do not include fields which would make keyMatch() false + for( vector< ElementMatcher >::const_iterator i = other.basics.begin(); i != other.basics.end(); ++i ) { + if ( key.hasField( i->toMatch.fieldName() ) ) { + switch( i->compareOp ) { + case BSONObj::opSIZE: + case BSONObj::opALL: + case BSONObj::NE: + case BSONObj::NIN: + break; + default: { + if ( !i->isNot && i->toMatch.type() != Array ) { + basics.push_back( *i ); + } + } + } + } + } + for( int i = 0; i < other.nRegex; ++i ) { + if ( !other.regexs[ i ].isNot && key.hasField( other.regexs[ i ].fieldName ) ) { + regexs[ nRegex++ ] = other.regexs[ i ]; + } + } + for( list< shared_ptr< Matcher > >::const_iterator i = other._orMatchers.begin(); i != other._orMatchers.end(); ++i ) { + _orMatchers.push_back( shared_ptr< Matcher >( new Matcher( **i, key ) ) ); + } } inline bool regexMatches(const RegexMatcher& rm, const BSONElement& e) { @@ -711,7 +741,7 @@ return false; if ( cmp == 0 ) { /* missing is ok iff we were looking for null */ - if ( m.type() == jstNULL || m.type() == Undefined ) { + if ( m.type() == jstNULL || m.type() == Undefined || ( bm.compareOp == BSONObj::opIN && bm.myset->count( staticNull.firstElement() ) > 0 ) ) { if ( ( bm.compareOp == BSONObj::NE ) ^ bm.isNot ) { return false; } @@ -741,6 +771,42 @@ return false; } + if ( _orMatchers.size() > 0 ) { + bool match = false; + for( list< shared_ptr< Matcher > >::const_iterator i = _orMatchers.begin(); + i != _orMatchers.end(); ++i ) { + // SERVER-205 don't submit details - we don't want to track field + // matched within $or, and at this point we've already loaded the + // whole document + if ( (*i)->matches( jsobj ) ) { + match = true; + break; + } + } + if ( !match ) { + return false; + } + } + + if ( _norMatchers.size() > 0 ) { + for( list< shared_ptr< Matcher > >::const_iterator i = _norMatchers.begin(); + i != _norMatchers.end(); ++i ) { + // SERVER-205 don't submit details - we don't want to track field + // matched within $nor, and at this point we've already loaded the + // whole document + if ( (*i)->matches( jsobj ) ) { + return false; + } + } + } + + for( vector< shared_ptr< FieldRangeVector > >::const_iterator i = _orConstraints.begin(); + i != _orConstraints.end(); ++i ) { + if ( (*i)->matches( jsobj ) ) { + return false; + } + } + if ( where ) { if ( where->func == 0 ) { uassert( 10070 , "$where compile error", false); @@ -769,7 +835,7 @@ return where->scope->getBoolean( "return" ) != 0; } - + return true; } @@ -780,6 +846,72 @@ return false; } + bool Matcher::sameCriteriaCount( const Matcher &other ) const { + if ( !( basics.size() == other.basics.size() && nRegex == other.nRegex && !where == !other.where ) ) { + return false; + } + if ( _norMatchers.size() != other._norMatchers.size() ) { + return false; + } + if ( _orMatchers.size() != other._orMatchers.size() ) { + return false; + } + if ( _orConstraints.size() != other._orConstraints.size() ) { + return false; + } + { + list< shared_ptr< Matcher > >::const_iterator i = _norMatchers.begin(); + list< shared_ptr< Matcher > >::const_iterator j = other._norMatchers.begin(); + while( i != _norMatchers.end() ) { + if ( !(*i)->sameCriteriaCount( **j ) ) { + return false; + } + ++i; ++j; + } + } + { + list< shared_ptr< Matcher > >::const_iterator i = _orMatchers.begin(); + list< shared_ptr< Matcher > >::const_iterator j = other._orMatchers.begin(); + while( i != _orMatchers.end() ) { + if ( !(*i)->sameCriteriaCount( **j ) ) { + return false; + } + ++i; ++j; + } + } + return true; + } + + + /*- just for testing -- */ +#pragma pack(1) + struct JSObj1 { + JSObj1() { + totsize=sizeof(JSObj1); + n = NumberDouble; + strcpy_s(nname, 5, "abcd"); + N = 3.1; + s = String; + strcpy_s(sname, 7, "abcdef"); + slen = 10; + strcpy_s(sval, 10, "123456789"); + eoo = EOO; + } + unsigned totsize; + + char n; + char nname[5]; + double N; + + char s; + char sname[7]; + unsigned slen; + char sval[10]; + + char eoo; + }; +#pragma pack() + struct JSObj1 js1; #pragma pack(1) diff -Nru mongodb-1.4.4/db/matcher.h mongodb-1.6.3/db/matcher.h --- mongodb-1.4.4/db/matcher.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/matcher.h 2010-09-24 10:02:42.000000000 -0700 @@ -24,9 +24,11 @@ #include namespace mongo { - + + class Cursor; class CoveredIndexMatcher; class Matcher; + class FieldRangeVector; class RegexMatcher { public: @@ -133,20 +135,40 @@ return op <= BSONObj::LTE ? -1 : 1; } - // Only specify constrainIndexKey if matches() will be called with - // index keys having empty string field names. - Matcher(const BSONObj &pattern, const BSONObj &constrainIndexKey = BSONObj()); + Matcher(const BSONObj &pattern, bool subMatcher = false); ~Matcher(); bool matches(const BSONObj& j, MatchDetails * details = 0 ); + // fast rough check to see if we must load the real doc - we also + // compare field counts against covereed index matcher; for $or clauses + // we just compare field counts bool keyMatch() const { return !all && !haveSize && !hasArray && !haveNeg; } bool atomic() const { return _atomic; } - + bool hasType( BSONObj::MatchType type ) const; + + string toString() const { + return jsobj.toString(); + } + + void addOrConstraint( const shared_ptr< FieldRangeVector > &frv ) { + _orConstraints.push_back( frv ); + } + + void popOrClause() { + _orMatchers.pop_front(); + } + + bool sameCriteriaCount( const Matcher &other ) const; + private: + // Only specify constrainIndexKey if matches() will be called with + // index keys having empty string field names. + Matcher( const Matcher &other, const BSONObj &constrainIndexKey ); + void addBasic(const BSONElement &e, int c, bool isNot) { // TODO May want to selectively ignore these element types based on op type. if ( e.type() == MinKey || e.type() == MaxKey ) @@ -159,6 +181,9 @@ int valuesMatch(const BSONElement& l, const BSONElement& r, int op, const ElementMatcher& bm); + bool parseOrNor( const BSONElement &e, bool subMatcher ); + void parseOr( const BSONElement &e, bool subMatcher, list< shared_ptr< Matcher > > &matchers ); + Where *where; // set if query uses $where BSONObj jsobj; // the query pattern. e.g., { name: "joe" } BSONObj constrainIndexKey_; @@ -180,6 +205,9 @@ // so we delete the mem when we're done: vector< shared_ptr< BSONObjBuilder > > _builders; + list< shared_ptr< Matcher > > _orMatchers; + list< shared_ptr< Matcher > > _norMatchers; + vector< shared_ptr< FieldRangeVector > > _orConstraints; friend class CoveredIndexMatcher; }; @@ -187,15 +215,30 @@ // If match succeeds on index key, then attempt to match full document. class CoveredIndexMatcher : boost::noncopyable { public: - CoveredIndexMatcher(const BSONObj &pattern, const BSONObj &indexKeyPattern); - bool matches(const BSONObj &o){ return _docMatcher.matches( o ); } + CoveredIndexMatcher(const BSONObj &pattern, const BSONObj &indexKeyPattern , bool alwaysUseRecord=false ); + bool matches(const BSONObj &o){ return _docMatcher->matches( o ); } bool matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details = 0 ); + bool matchesCurrent( Cursor * cursor , MatchDetails * details = 0 ); bool needRecord(){ return _needRecord; } + + Matcher& docMatcher() { return *_docMatcher; } - Matcher& docMatcher() { return _docMatcher; } + // once this is called, shouldn't use this matcher for matching any more + void advanceOrClause( const shared_ptr< FieldRangeVector > &frv ) { + _docMatcher->addOrConstraint( frv ); + // TODO this is not an optimal optimization, since we could skip an entire + // or clause (if a match is impossible) between calls to advanceOrClause() + _docMatcher->popOrClause(); + } + + CoveredIndexMatcher *nextClauseMatcher( const BSONObj &indexKeyPattern, bool alwaysUseRecord=false ) { + return new CoveredIndexMatcher( _docMatcher, indexKeyPattern, alwaysUseRecord ); + } private: + CoveredIndexMatcher(const shared_ptr< Matcher > &docMatcher, const BSONObj &indexKeyPattern , bool alwaysUseRecord=false ); + void init( bool alwaysUseRecord ); + shared_ptr< Matcher > _docMatcher; Matcher _keyMatcher; - Matcher _docMatcher; bool _needRecord; }; diff -Nru mongodb-1.4.4/db/module.cpp mongodb-1.6.3/db/module.cpp --- mongodb-1.4.4/db/module.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/module.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ */ -#include "stdafx.h" +#include "pch.h" #include "module.h" namespace mongo { diff -Nru mongodb-1.4.4/db/module.h mongodb-1.6.3/db/module.h --- mongodb-1.4.4/db/module.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/module.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include #include diff -Nru mongodb-1.4.4/db/modules/mms.cpp mongodb-1.6.3/db/modules/mms.cpp --- mongodb-1.4.4/db/modules/mms.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/modules/mms.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -// mms.cpp +// @file mms.cpp /* * Copyright (C) 2010 10gen Inc. * @@ -16,7 +16,7 @@ */ -#include "stdafx.h" +#include "pch.h" #include "../db.h" #include "../instance.h" #include "../module.h" Binary files /tmp/eJqezWvDkt/mongodb-1.4.4/db/mongo.ico and /tmp/oJOREzsx7Y/mongodb-1.6.3/db/mongo.ico differ diff -Nru mongodb-1.4.4/db/mr.cpp mongodb-1.6.3/db/mr.cpp --- mongodb-1.4.4/db/mr.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/mr.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "db.h" #include "instance.h" #include "commands.h" @@ -23,6 +23,9 @@ #include "../client/dbclient.h" #include "../client/connpool.h" #include "../client/parallel.h" +#include "queryoptimizer.h" +#include "matcher.h" +#include "clientcursor.h" namespace mongo { @@ -49,7 +52,7 @@ BSONObj key; BSONObjBuilder reduceArgs( sizeEstimate ); - BSONArrayBuilder * valueBuilder = 0; + boost::scoped_ptr valueBuilder; int sizeSoFar = 0; unsigned n = 0; @@ -59,8 +62,8 @@ if ( n == 0 ){ reduceArgs.append( keyE ); key = keyE.wrap(); - valueBuilder = new BSONArrayBuilder( reduceArgs.subarrayStart( "values" ) ); sizeSoFar = 5 + keyE.size(); + valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "values" ) )); } BSONElement ee = j.next(); @@ -77,7 +80,6 @@ } assert(valueBuilder); valueBuilder->done(); - delete valueBuilder; BSONObj args = reduceArgs.obj(); s->invokeSafe( reduce , args ); @@ -152,10 +154,10 @@ } { // code - mapCode = cmdObj["map"].ascode(); - reduceCode = cmdObj["reduce"].ascode(); + mapCode = cmdObj["map"]._asCode(); + reduceCode = cmdObj["reduce"]._asCode(); if ( cmdObj["finalize"].type() ){ - finalizeCode = cmdObj["finalize"].ascode(); + finalizeCode = cmdObj["finalize"]._asCode(); } checkCodeWScope( "map" , cmdObj ); checkCodeWScope( "reduce" , cmdObj ); @@ -174,11 +176,11 @@ { // query options if ( cmdObj["query"].type() == Object ){ filter = cmdObj["query"].embeddedObjectUserCheck(); - q = filter; } - if ( cmdObj["sort"].type() == Object ) - q.sort( cmdObj["sort"].embeddedObjectUserCheck() ); + if ( cmdObj["sort"].type() == Object ){ + sort = cmdObj["sort"].embeddedObjectUserCheck(); + } if ( cmdObj["limit"].isNumber() ) limit = cmdObj["limit"].numberLong(); @@ -220,7 +222,7 @@ // query options BSONObj filter; - Query q; + BSONObj sort; long long limit; // functions @@ -287,7 +289,7 @@ if ( setup.replicate ) theDataFileMgr.insertAndLog( setup.tempLong.c_str() , res , false ); else - theDataFileMgr.insert( setup.tempLong.c_str() , res , false ); + theDataFileMgr.insertWithObjMod( setup.tempLong.c_str() , res , false ); } @@ -303,21 +305,17 @@ class MRTL { public: - MRTL( MRState& state ) : _state( state ){ - _temp = new InMemory(); + MRTL( MRState& state ) + : _state( state ) + , _temp(new InMemory()) + { _size = 0; numEmits = 0; } - ~MRTL(){ - delete _temp; - } - void reduceInMemory(){ - - InMemory * old = _temp; - InMemory * n = new InMemory(); - _temp = n; + boost::shared_ptr old = _temp; + _temp.reset(new InMemory()); _size = 0; for ( InMemory::iterator i=old->begin(); i!=old->end(); i++ ){ @@ -327,6 +325,7 @@ if ( all.size() == 1 ){ // this key has low cardinality, so just write to db writelock l(_state.setup.incLong); + Client::Context ctx(_state.setup.incLong.c_str()); write( *(all.begin()) ); } else if ( all.size() > 1 ){ @@ -334,9 +333,6 @@ insert( res ); } } - - delete( old ); - } void dump(){ @@ -379,12 +375,12 @@ private: void write( BSONObj& o ){ - theDataFileMgr.insert( _state.setup.incLong.c_str() , o , true ); + theDataFileMgr.insertWithObjMod( _state.setup.incLong.c_str() , o , true ); } MRState& _state; - InMemory * _temp; + boost::shared_ptr _temp; long _size; public: @@ -403,20 +399,22 @@ class MapReduceCommand : public Command { public: - MapReduceCommand() : Command("mapreduce"){} - virtual bool slaveOk() { return true; } + MapReduceCommand() : Command("mapReduce", false, "mapreduce"){} + virtual bool slaveOk() const { return true; } virtual void help( stringstream &help ) const { - help << "see http://www.mongodb.org/display/DOCS/MapReduce"; + help << "Run a map/reduce operation on the server.\n"; + help << "Note this is used for aggregation, not querying, in MongoDB.\n"; + help << "http://www.mongodb.org/display/DOCS/MapReduce"; } - virtual LockType locktype(){ return WRITE; } // TODO, READ? - bool run(const char *dbname, BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + virtual LockType locktype() const { return NONE; } + bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ Timer t; Client::GodScope cg; Client& client = cc(); CurOp * op = client.curop(); - MRSetup mr( client.database()->name , cmd ); + MRSetup mr( dbname , cmd ); log(1) << "mr ns: " << mr.ns << endl; @@ -440,36 +438,58 @@ MRTL * mrtl = new MRTL( state ); _tlmr.reset( mrtl ); - ProgressMeter & pm = op->setMessage( "m/r: (1/3) emit phase" , db.count( mr.ns , mr.filter ) ); - auto_ptr cursor = db.query( mr.ns , mr.q ); + ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , db.count( mr.ns , mr.filter ) ) ); long long mapTime = 0; - Timer mt; - while ( cursor->more() ){ - BSONObj o = cursor->next(); - - if ( mr.verbose ) mt.reset(); - - state.scope->setThis( &o ); - if ( state.scope->invoke( state.map , state.setup.mapparams , 0 , true ) ) - throw UserException( 9014, (string)"map invoke failed: " + state.scope->getError() ); - - if ( mr.verbose ) mapTime += mt.micros(); - - num++; - if ( num % 100 == 0 ){ - Timer t; - mrtl->checkSize(); - inReduce += t.micros(); - killCurrentOp.checkForInterrupt(); - dbtemprelease temprlease; - } - pm.hit(); + { + readlock lock( mr.ns ); + Client::Context ctx( mr.ns ); + + shared_ptr temp = bestGuessCursor( mr.ns.c_str(), mr.filter, mr.sort ); + auto_ptr cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.ns.c_str() ) ); - if ( mr.limit && num >= mr.limit ) - break; + Timer mt; + while ( cursor->ok() ){ + + if ( ! cursor->currentMatches() ){ + cursor->advance(); + continue; + } + + BSONObj o = cursor->current(); + cursor->advance(); + + if ( mr.verbose ) mt.reset(); + + state.scope->setThis( &o ); + if ( state.scope->invoke( state.map , state.setup.mapparams , 0 , true ) ) + throw UserException( 9014, (string)"map invoke failed: " + state.scope->getError() ); + + if ( mr.verbose ) mapTime += mt.micros(); + + num++; + if ( num % 100 == 0 ){ + ClientCursor::YieldLock yield (cursor.get()); + Timer t; + mrtl->checkSize(); + inReduce += t.micros(); + + if ( ! yield.stillOk() ){ + cursor.release(); + break; + } + + killCurrentOp.checkForInterrupt(); + } + pm.hit(); + + if ( mr.limit && num >= mr.limit ) + break; + } } pm.finished(); + killCurrentOp.checkForInterrupt(); + countsBuilder.appendNumber( "input" , num ); countsBuilder.appendNumber( "emit" , mrtl->numEmits ); if ( mrtl->numEmits ) @@ -486,36 +506,68 @@ BSONObj sortKey = BSON( "0" << 1 ); db.ensureIndex( mr.incLong , sortKey ); - BSONObj prev; - BSONList all; - - assert( userCreateNS( mr.tempLong.c_str() , BSONObj() , errmsg , mr.replicate ) ); + { + writelock lock( mr.tempLong.c_str() ); + Client::Context ctx( mr.tempLong.c_str() ); + assert( userCreateNS( mr.tempLong.c_str() , BSONObj() , errmsg , mr.replicate ) ); + } - pm = op->setMessage( "m/r: (3/3) final reduce to collection" , db.count( mr.incLong ) ); - cursor = db.query( mr.incLong, Query().sort( sortKey ) ); - while ( cursor->more() ){ - BSONObj o = cursor->next().getOwned(); - pm.hit(); + { + readlock rl(mr.incLong.c_str()); + Client::Context ctx( mr.incLong ); + + BSONObj prev; + BSONList all; + + assert( pm == op->setMessage( "m/r: (3/3) final reduce to collection" , db.count( mr.incLong ) ) ); - if ( o.woSortOrder( prev , sortKey ) == 0 ){ + shared_ptr temp = bestGuessCursor( mr.incLong.c_str() , BSONObj() , sortKey ); + auto_ptr cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.incLong.c_str() ) ); + + while ( cursor->ok() ){ + BSONObj o = cursor->current().getOwned(); + cursor->advance(); + + pm.hit(); + + if ( o.woSortOrder( prev , sortKey ) == 0 ){ + all.push_back( o ); + if ( pm->hits() % 1000 == 0 ){ + if ( ! cursor->yield() ){ + cursor.release(); + break; + } + killCurrentOp.checkForInterrupt(); + } + continue; + } + + ClientCursor::YieldLock yield (cursor.get()); + state.finalReduce( all ); + + all.clear(); + prev = o; all.push_back( o ); - if ( pm.hits() % 1000 == 0 ){ - dbtemprelease tl; + + if ( ! yield.stillOk() ){ + cursor.release(); + break; } - continue; + + killCurrentOp.checkForInterrupt(); } - - state.finalReduce( all ); - - all.clear(); - prev = o; - all.push_back( o ); - killCurrentOp.checkForInterrupt(); - dbtemprelease tl; + + { + dbtempreleasecond tl; + if ( ! tl.unlocked() ) + log( LL_WARNING ) << "map/reduce can't temp release" << endl; + state.finalReduce( all ); + } + + pm.finished(); } - state.finalReduce( all ); - pm.finished(); + _tlmr.reset( 0 ); } catch ( ... ){ @@ -525,9 +577,13 @@ throw; } - db.dropCollection( mr.incLong ); + long long finalCount = 0; + { + dblock lock; + db.dropCollection( mr.incLong ); - long long finalCount = mr.renameIfNeeded( db ); + finalCount = mr.renameIfNeeded( db ); + } timingBuilder.append( "total" , t.millis() ); @@ -554,13 +610,10 @@ class MapReduceFinishCommand : public Command { public: MapReduceFinishCommand() : Command( "mapreduce.shardedfinish" ){} - virtual bool slaveOk() { return true; } + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return WRITE; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string dbname = cc().database()->name; // this has to come before dbtemprelease - dbtemprelease temprelease; // we don't touch the db directly - + virtual LockType locktype() const { return NONE; } + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe(); MRSetup mr( dbname , cmdObj.firstElement().embeddedObjectUserCheck() , false ); @@ -572,71 +625,80 @@ BSONObj shards = cmdObj["shards"].embeddedObjectUserCheck(); vector< auto_ptr > shardCursors; - BSONObjIterator i( shards ); - while ( i.more() ){ - BSONElement e = i.next(); - string shard = e.fieldName(); - - BSONObj res = e.embeddedObjectUserCheck(); - - uassert( 10078 , "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() ); - servers.insert( shard ); - shardCounts.appendAs( res["counts"] , shard.c_str() ); - BSONObjIterator j( res["counts"].embeddedObjectUserCheck() ); - while ( j.more() ){ - BSONElement temp = j.next(); - counts[temp.fieldName()] += temp.numberLong(); + { // parse per shard results + BSONObjIterator i( shards ); + while ( i.more() ){ + BSONElement e = i.next(); + string shard = e.fieldName(); + + BSONObj res = e.embeddedObjectUserCheck(); + + uassert( 10078 , "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() ); + servers.insert( shard ); + shardCounts.appendAs( res["counts"] , shard.c_str() ); + + BSONObjIterator j( res["counts"].embeddedObjectUserCheck() ); + while ( j.more() ){ + BSONElement temp = j.next(); + counts[temp.fieldName()] += temp.numberLong(); + } + } - + } - - BSONObj sortKey = BSON( "_id" << 1 ); - - ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection , - Query().sort( sortKey ) ); - - auto_ptr s = globalScriptEngine->getPooledScope( ns ); - ScriptingFunction reduceFunction = s->createFunction( mr.reduceCode.c_str() ); - ScriptingFunction finalizeFunction = 0; - if ( mr.finalizeCode.size() ) - finalizeFunction = s->createFunction( mr.finalizeCode.c_str() ); - - BSONList values; - - result.append( "result" , mr.finalShort ); - DBDirectClient db; - - while ( cursor.more() ){ - BSONObj t = cursor.next().getOwned(); - - if ( values.size() == 0 ){ - values.push_back( t ); - continue; - } - if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ){ + { // reduce from each stream + + BSONObj sortKey = BSON( "_id" << 1 ); + + ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection , + Query().sort( sortKey ) ); + cursor.init(); + + auto_ptr s = globalScriptEngine->getPooledScope( dbname ); + s->localConnect( dbname.c_str() ); + ScriptingFunction reduceFunction = s->createFunction( mr.reduceCode.c_str() ); + ScriptingFunction finalizeFunction = 0; + if ( mr.finalizeCode.size() ) + finalizeFunction = s->createFunction( mr.finalizeCode.c_str() ); + + BSONList values; + + result.append( "result" , mr.finalShort ); + + while ( cursor.more() ){ + BSONObj t = cursor.next().getOwned(); + + if ( values.size() == 0 ){ + values.push_back( t ); + continue; + } + + if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ){ + values.push_back( t ); + continue; + } + + + db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) ); + values.clear(); values.push_back( t ); - continue; } - - db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) ); - values.clear(); - values.push_back( t ); + if ( values.size() ) + db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) ); } - if ( values.size() ) - db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) ); - long long finalCount = mr.renameIfNeeded( db ); log(0) << " mapreducefinishcommand " << mr.finalLong << " " << finalCount << endl; for ( set::iterator i=servers.begin(); i!=servers.end(); i++ ){ ScopedDbConnection conn( i->_server ); conn->dropCollection( dbname + "." + shardedOutputCollection ); + conn.done(); } result.append( "shardCounts" , shardCounts.obj() ); diff -Nru mongodb-1.4.4/db/namespace.cpp mongodb-1.6.3/db/namespace.cpp --- mongodb-1.4.4/db/namespace.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/namespace.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "pdfile.h" #include "db.h" #include "../util/mmap.h" @@ -42,8 +42,34 @@ 0x400000, 0x800000 }; + NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool _capped ) { + /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */ + firstExtent = lastExtent = capExtent = loc; + datasize = nrecords = 0; + lastExtentSize = 0; + nIndexes = 0; + capped = _capped; + max = 0x7fffffff; + paddingFactor = 1.0; + flags = 0; + capFirstNewRecord = DiskLoc(); + // Signal that we are on first allocation iteration through extents. + capFirstNewRecord.setInvalid(); + // For capped case, signal that we are doing initial extent allocation. + if ( capped ) + cappedLastDelRecLastExtent().setInvalid(); + assert( sizeof(dataFileVersion) == 2 ); + dataFileVersion = 0; + indexFileVersion = 0; + multiKeyIndexBits = 0; + reservedA = 0; + extraOffset = 0; + backgroundIndexBuildInProgress = 0; + memset(reserved, 0, sizeof(reserved)); + } + bool NamespaceIndex::exists() const { - return !boost::filesystem::exists(path()); + return !MMF::exists(path()); } boost::filesystem::path NamespaceIndex::path() const { @@ -78,7 +104,7 @@ } } - static void callback(const Namespace& k, NamespaceDetails& v) { + static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) { v.onLoad(k); } @@ -100,10 +126,10 @@ int len = -1; boost::filesystem::path nsPath = path(); string pathString = nsPath.string(); - void *p; - if( boost::filesystem::exists(nsPath) ) { + MMF::Pointer p; + if( MMF::exists(nsPath) ) { p = f.map(pathString.c_str()); - if( p ) { + if( !p.isNull() ) { len = f.length(); if ( len % (1024*1024) != 0 ){ log() << "bad .ns file: " << pathString << endl; @@ -117,22 +143,38 @@ maybeMkdir(); long l = lenForNewNsFiles; p = f.map(pathString.c_str(), l); - if( p ) { + if( !p.isNull() ) { len = (int) l; assert( len == lenForNewNsFiles ); } } - if ( p == 0 ) { + if ( p.isNull() ) { problem() << "couldn't open file " << pathString << " terminating" << endl; dbexit( EXIT_FS ); } - ht = new HashTable(p, len, "namespace index"); + + ht = new HashTable(p, len, "namespace index"); if( checkNsFilesOnLoad ) - ht->iterAll(callback); + ht->iterAll(namespaceOnLoadCallback); + } + + static void namespaceGetNamespacesCallback( const Namespace& k , NamespaceDetails& v , void * extra ) { + list * l = (list*)extra; + if ( ! k.hasDollarSign() ) + l->push_back( (string)k ); + } + + void NamespaceIndex::getNamespaces( list& tofill , bool onlyCollections ) const { + assert( onlyCollections ); // TODO: need to implement this + // need boost::bind or something to make this less ugly + + if ( ht ) + ht->iterAll( namespaceGetNamespacesCallback , (void*)&tofill ); } void NamespaceDetails::addDeletedRec(DeletedRecord *d, DiskLoc dloc) { + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) ); { // defensive code: try to make us notice if we reference a deleted record (unsigned&) (((Record *) d)->data) = 0xeeeeeeee; @@ -140,19 +182,20 @@ dassert( dloc.drec() == d ); DEBUGGING out() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl; if ( capped ) { - if ( !deletedList[ 1 ].isValid() ) { + if ( !cappedLastDelRecLastExtent().isValid() ) { // Initial extent allocation. Insert at end. d->nextDeleted = DiskLoc(); - if ( deletedList[ 0 ].isNull() ) - deletedList[ 0 ] = dloc; + if ( cappedListOfAllDeletedRecords().isNull() ) + cappedListOfAllDeletedRecords() = dloc; else { - DiskLoc i = deletedList[ 0 ]; + DiskLoc i = cappedListOfAllDeletedRecords(); for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted ); i.drec()->nextDeleted = dloc; } } else { - d->nextDeleted = firstDeletedInCapExtent(); - firstDeletedInCapExtent() = dloc; + d->nextDeleted = cappedFirstDeletedInCurExtent(); + cappedFirstDeletedInCurExtent() = dloc; + // always compact() after this so order doesn't matter } } else { int b = bucket(d->lengthWithHeaders); @@ -186,15 +229,17 @@ if ( capped == 0 ) { if ( left < 24 || left < (lenToAlloc >> 3) ) { // you get the whole thing. + DataFileMgr::grow(loc, regionlen); return loc; } } /* split off some for further use. */ r->lengthWithHeaders = lenToAlloc; + DataFileMgr::grow(loc, lenToAlloc); DiskLoc newDelLoc = loc; newDelLoc.inc(lenToAlloc); - DeletedRecord *newDel = newDelLoc.drec(); + DeletedRecord *newDel = DataFileMgr::makeDeletedRecord(newDelLoc, left); newDel->extentOfs = r->extentOfs; newDel->lengthWithHeaders = left; newDel->nextDeleted.Null(); @@ -298,53 +343,6 @@ } } - /* combine adjacent deleted records - - this is O(n^2) but we call it for capped tables where typically n==1 or 2! - (or 3...there will be a little unused sliver at the end of the extent.) - */ - void NamespaceDetails::compact() { - assert(capped); - - list drecs; - - // Pull out capExtent's DRs from deletedList - DiskLoc i = firstDeletedInCapExtent(); - for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted ) - drecs.push_back( i ); - firstDeletedInCapExtent() = i; - - // This is the O(n^2) part. - drecs.sort(); - - list::iterator j = drecs.begin(); - assert( j != drecs.end() ); - DiskLoc a = *j; - while ( 1 ) { - j++; - if ( j == drecs.end() ) { - DEBUGGING out() << "TEMP: compact adddelrec\n"; - addDeletedRec(a.drec(), a); - break; - } - DiskLoc b = *j; - while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) { - // a & b are adjacent. merge. - a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders; - j++; - if ( j == drecs.end() ) { - DEBUGGING out() << "temp: compact adddelrec2\n"; - addDeletedRec(a.drec(), a); - return; - } - b = *j; - } - DEBUGGING out() << "temp: compact adddelrec3\n"; - addDeletedRec(a.drec(), a); - a = b; - } - } - DiskLoc NamespaceDetails::firstRecord( const DiskLoc &startExtent ) const { for (DiskLoc i = startExtent.isNull() ? firstExtent : startExtent; !i.isNull(); i = i.ext()->xnext ) { @@ -363,47 +361,6 @@ return DiskLoc(); } - DiskLoc &NamespaceDetails::firstDeletedInCapExtent() { - if ( deletedList[ 1 ].isNull() ) - return deletedList[ 0 ]; - else - return deletedList[ 1 ].drec()->nextDeleted; - } - - bool NamespaceDetails::inCapExtent( const DiskLoc &dl ) const { - assert( !dl.isNull() ); - // We could have a rec or drec, doesn't matter. - return dl.drec()->myExtent( dl ) == capExtent.ext(); - } - - bool NamespaceDetails::nextIsInCapExtent( const DiskLoc &dl ) const { - assert( !dl.isNull() ); - DiskLoc next = dl.drec()->nextDeleted; - if ( next.isNull() ) - return false; - return inCapExtent( next ); - } - - void NamespaceDetails::advanceCapExtent( const char *ns ) { - // We want deletedList[ 1 ] to be the last DeletedRecord of the prev cap extent - // (or DiskLoc() if new capExtent == firstExtent) - if ( capExtent == lastExtent ) - deletedList[ 1 ] = DiskLoc(); - else { - DiskLoc i = firstDeletedInCapExtent(); - for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted ); - deletedList[ 1 ] = i; - } - - capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext; - - /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */ - //dassert( theCapExtent()->ns == ns ); - - theCapExtent()->assertOk(); - capFirstNewRecord = DiskLoc(); - } - int n_complaints_cap = 0; void NamespaceDetails::maybeComplain( const char *ns, int len ) const { if ( ++n_complaints_cap < 8 ) { @@ -422,157 +379,84 @@ } } - DiskLoc NamespaceDetails::__capAlloc( int len ) { - DiskLoc prev = deletedList[ 1 ]; - DiskLoc i = firstDeletedInCapExtent(); - DiskLoc ret; - for (; !i.isNull() && inCapExtent( i ); prev = i, i = i.drec()->nextDeleted ) { - // We need to keep at least one DR per extent in deletedList[ 0 ], - // so make sure there's space to create a DR at the end. - if ( i.drec()->lengthWithHeaders >= len + 24 ) { - ret = i; - break; - } - } - - /* unlink ourself from the deleted list */ - if ( !ret.isNull() ) { - if ( prev.isNull() ) - deletedList[ 0 ] = ret.drec()->nextDeleted; - else - prev.drec()->nextDeleted = ret.drec()->nextDeleted; - ret.drec()->nextDeleted.setInvalid(); // defensive. - assert( ret.drec()->extentOfs < ret.getOfs() ); - } - - return ret; - } - - void NamespaceDetails::checkMigrate() { - // migrate old NamespaceDetails format - if ( capped && capExtent.a() == 0 && capExtent.getOfs() == 0 ) { - capFirstNewRecord = DiskLoc(); - capFirstNewRecord.setInvalid(); - // put all the DeletedRecords in deletedList[ 0 ] - for ( int i = 1; i < Buckets; ++i ) { - DiskLoc first = deletedList[ i ]; - if ( first.isNull() ) - continue; - DiskLoc last = first; - for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted ); - last.drec()->nextDeleted = deletedList[ 0 ]; - deletedList[ 0 ] = first; - deletedList[ i ] = DiskLoc(); - } - // NOTE deletedList[ 1 ] set to DiskLoc() in above - - // Last, in case we're killed before getting here - capExtent = firstExtent; - } - } - /* alloc with capped table handling. */ DiskLoc NamespaceDetails::_alloc(const char *ns, int len) { if ( !capped ) return __stdAlloc(len); - // capped. + return cappedAlloc(ns,len); + } - // signal done allocating new extents. - if ( !deletedList[ 1 ].isValid() ) - deletedList[ 1 ] = DiskLoc(); + /* extra space for indexes when more than 10 */ + NamespaceDetails::Extra* NamespaceIndex::newExtra(const char *ns, int i, NamespaceDetails *d) { + assert( i >= 0 && i <= 1 ); + Namespace n(ns); + Namespace extra(n.extraName(i).c_str()); // throws userexception if ns name too long - assert( len < 400000000 ); - int passes = 0; - int maxPasses = ( len / 30 ) + 2; // 30 is about the smallest entry that could go in the oplog - if ( maxPasses < 5000 ){ - // this is for bacwards safety since 5000 was the old value - maxPasses = 5000; - } - DiskLoc loc; - - // delete records until we have room and the max # objects limit achieved. - - /* this fails on a rename -- that is ok but must keep commented out */ - //assert( theCapExtent()->ns == ns ); - - theCapExtent()->assertOk(); - DiskLoc firstEmptyExtent; - while ( 1 ) { - if ( nrecords < max ) { - loc = __capAlloc( len ); - if ( !loc.isNull() ) - break; - } - - // If on first iteration through extents, don't delete anything. - if ( !capFirstNewRecord.isValid() ) { - advanceCapExtent( ns ); - if ( capExtent != firstExtent ) - capFirstNewRecord.setInvalid(); - // else signal done with first iteration through extents. - continue; - } + massert( 10350 , "allocExtra: base ns missing?", d ); + massert( 10351 , "allocExtra: extra already exists", ht->get(extra) == 0 ); - if ( !capFirstNewRecord.isNull() && - theCapExtent()->firstRecord == capFirstNewRecord ) { - // We've deleted all records that were allocated on the previous - // iteration through this extent. - advanceCapExtent( ns ); - continue; - } - - if ( theCapExtent()->firstRecord.isNull() ) { - if ( firstEmptyExtent.isNull() ) - firstEmptyExtent = capExtent; - advanceCapExtent( ns ); - if ( firstEmptyExtent == capExtent ) { - maybeComplain( ns, len ); - return DiskLoc(); - } - continue; - } - - massert( 10344 , "Capped collection full and delete not allowed", cappedMayDelete() ); - DiskLoc fr = theCapExtent()->firstRecord; - theDataFileMgr.deleteRecord(ns, fr.rec(), fr, true); - compact(); - if( ++passes > maxPasses ) { - log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n'; - log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl; - massert( 10345 , "passes >= maxPasses in capped collection alloc", false ); - } + NamespaceDetails::Extra temp; + temp.init(); + uassert( 10082 , "allocExtra: too many namespaces/collections", ht->put(extra, (NamespaceDetails&) temp)); + NamespaceDetails::Extra *e = (NamespaceDetails::Extra *) ht->get(extra); + return e; + } + NamespaceDetails::Extra* NamespaceDetails::allocExtra(const char *ns, int nindexessofar) { + NamespaceIndex *ni = nsindex(ns); + int i = (nindexessofar - NIndexesBase) / NIndexesExtra; + Extra *e = ni->newExtra(ns, i, this); + long ofs = e->ofsFrom(this); + if( i == 0 ) { + assert( extraOffset == 0 ); + extraOffset = ofs; + assert( extra() == e ); + } + else { + Extra *hd = extra(); + assert( hd->next(this) == 0 ); + hd->setNext(ofs); } - - // Remember first record allocated on this iteration through capExtent. - if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() ) - capFirstNewRecord = loc; - - return loc; + return e; } /* you MUST call when adding an index. see pdfile.cpp */ IndexDetails& NamespaceDetails::addIndex(const char *thisns, bool resetTransient) { assert( nsdetails(thisns) == this ); - if( nIndexes == NIndexesBase && extraOffset == 0 ) { - nsindex(thisns)->allocExtra(thisns); + IndexDetails *id; + try { + id = &idx(nIndexes,true); + } + catch(DBException&) { + allocExtra(thisns, nIndexes); + id = &idx(nIndexes,false); } - IndexDetails& id = idx(nIndexes); nIndexes++; if ( resetTransient ) NamespaceDetailsTransient::get_w(thisns).addedIndex(); - return id; + return *id; } // must be called when renaming a NS to fix up extra void NamespaceDetails::copyingFrom(const char *thisns, NamespaceDetails *src) { - if( extraOffset ) { - extraOffset = 0; // so allocExtra() doesn't assert. - Extra *e = nsindex(thisns)->allocExtra(thisns); - memcpy(e, src->extra(), sizeof(Extra)); - } + extraOffset = 0; // we are a copy -- the old value is wrong. fixing it up below. + Extra *se = src->extra(); + int n = NIndexesBase; + if( se ) { + Extra *e = allocExtra(thisns, n); + while( 1 ) { + n += NIndexesExtra; + e->copy(this, *se); + se = se->next(src); + if( se == 0 ) break; + Extra *nxt = allocExtra(thisns, n); + e->setNext( nxt->ofsFrom(this) ); + e = nxt; + } + assert( extraOffset ); + } } /* returns index of the first index in which the field is present. -1 if not present. @@ -610,8 +494,8 @@ /* ------------------------------------------------------------------------- */ - mongo::mutex NamespaceDetailsTransient::_qcMutex; - mongo::mutex NamespaceDetailsTransient::_isMutex; + mongo::mutex NamespaceDetailsTransient::_qcMutex("qc"); + mongo::mutex NamespaceDetailsTransient::_isMutex("is"); map< string, shared_ptr< NamespaceDetailsTransient > > NamespaceDetailsTransient::_map; typedef map< string, shared_ptr< NamespaceDetailsTransient > >::iterator ouriter; @@ -651,42 +535,6 @@ i.next().keyPattern().getFieldNames(_indexKeys); } - void NamespaceDetailsTransient::cllStart( int logSizeMb ) { - assertInWriteLock(); - _cll_ns = "local.temp.oplog." + _ns; - _cll_enabled = true; - stringstream spec; - // 128MB - spec << "{size:" << logSizeMb * 1024 * 1024 << ",capped:true,autoIndexId:false}"; - Client::Context ct( _cll_ns ); - string err; - massert( 10347 , "Could not create log ns", userCreateNS( _cll_ns.c_str(), fromjson( spec.str() ), err, false ) ); - NamespaceDetails *d = nsdetails( _cll_ns.c_str() ); - d->cappedDisallowDelete(); - } - - void NamespaceDetailsTransient::cllInvalidate() { - assertInWriteLock(); - cllDrop(); - _cll_enabled = false; - } - - bool NamespaceDetailsTransient::cllValidateComplete() { - assertInWriteLock(); - cllDrop(); - bool ret = _cll_enabled; - _cll_enabled = false; - _cll_ns = ""; - return ret; - } - - void NamespaceDetailsTransient::cllDrop() { - assertInWriteLock(); - if ( !_cll_enabled ) - return; - Client::Context ctx( _cll_ns ); - dropNS( _cll_ns ); - } /* ------------------------------------------------------------------------- */ @@ -789,6 +637,8 @@ } bool legalClientSystemNS( const string& ns , bool write ){ + if( ns == "local.system.replset" ) return true; + if ( ns.find( ".system.users" ) != string::npos ) return true; diff -Nru mongodb-1.4.4/db/namespace.h mongodb-1.6.3/db/namespace.h --- mongodb-1.4.4/db/namespace.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/namespace.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" #include "queryutil.h" #include "diskloc.h" @@ -27,10 +27,6 @@ namespace mongo { - class Cursor; - -#pragma pack(1) - /* in the mongo source code, "client" means "database". */ const int MaxDatabaseLen = 256; // max str len for the db name, including null char @@ -55,6 +51,12 @@ nsToDatabase(ns, buf); return buf; } + inline string nsToDatabase(const string& ns) { + size_t i = ns.find( '.' ); + if ( i == string::npos ) + return ns; + return ns.substr( 0 , i ); + } /* e.g. NamespaceString ns("acme.orders"); @@ -84,6 +86,7 @@ } }; +#pragma pack(1) /* This helper class is used to make the HashMap below in NamespaceDetails */ class Namespace { public: @@ -99,26 +102,21 @@ } /* for more than 10 indexes -- see NamespaceDetails::Extra */ - string extraName() { - string s = string(buf) + "$extra"; - massert( 10348 , "ns name too long", s.size() < MaxNsLen); + string extraName(int i) { + char ex[] = "$extra"; + ex[5] += i; + string s = string(buf) + ex; + massert( 10348 , "$extra: ns name too long", s.size() < MaxNsLen); return s; } bool isExtra() const { - const char *p = strstr(buf, "$extra"); - return p && p[6] == 0; //==0 important in case an index uses name "$extra_1" for example - } - - void kill() { - buf[0] = 0x7f; - } - - bool operator==(const char *r) { - return strcmp(buf, r) == 0; - } - bool operator==(const Namespace& r) { - return strcmp(buf, r.buf) == 0; + const char *p = strstr(buf, "$extr"); + return p && p[5] && p[6] == 0; //==0 important in case an index uses name "$extra_1" for example } + bool hasDollarSign() const { return strchr( buf , '$' ) > 0; } + void kill() { buf[0] = 0x7f; } + bool operator==(const char *r) const { return strcmp(buf, r) == 0; } + bool operator==(const Namespace& r) const { return strcmp(buf, r.buf) == 0; } int hash() const { unsigned x = 0; const char *p = buf; @@ -141,26 +139,28 @@ return old + "." + local; } + string toString() const { + return (string)buf; + } + operator string() const { return (string)buf; } char buf[MaxNsLen]; }; +#pragma pack() -} +} // namespace mongo #include "index.h" namespace mongo { - /** - @return true if a client can modify this namespace - things like *.system.users - */ + /** @return true if a client can modify this namespace + things like *.system.users */ bool legalClientSystemNS( const string& ns , bool write ); - /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes so you can look for a deleterecord about the right size. */ @@ -169,6 +169,7 @@ extern int bucketSizes[]; +#pragma pack(1) /* this is the "header" for a collection that has all its details. in the .ns file. */ class NamespaceDetails { @@ -176,75 +177,108 @@ enum { NIndexesExtra = 30, NIndexesBase = 10 }; - struct Extra { + public: + struct ExtraOld { // note we could use this field for more chaining later, so don't waste it: - unsigned long long reserved1; + unsigned long long reserved1; IndexDetails details[NIndexesExtra]; unsigned reserved2; unsigned reserved3; }; + class Extra { + long long _next; + public: + IndexDetails details[NIndexesExtra]; + private: + unsigned reserved2; + unsigned reserved3; + Extra(const Extra&) { assert(false); } + Extra& operator=(const Extra& r) { assert(false); return *this; } + public: + Extra() { } + long ofsFrom(NamespaceDetails *d) { + return ((char *) this) - ((char *) d); + } + void init() { memset(this, 0, sizeof(Extra)); } + Extra* next(NamespaceDetails *d) { + if( _next == 0 ) return 0; + return (Extra*) (((char *) d) + _next); + } + void setNext(long ofs) { _next = ofs; } + void copy(NamespaceDetails *d, const Extra& e) { + memcpy(this, &e, sizeof(Extra)); + _next = 0; + } + }; // Extra + Extra* extra() { - assert( extraOffset ); + if( extraOffset == 0 ) return 0; return (Extra *) (((char *) this) + extraOffset); } + public: + /* add extra space for indexes when more than 10 */ + Extra* allocExtra(const char *ns, int nindexessofar); + void copyingFrom(const char *thisns, NamespaceDetails *src); // must be called when renaming a NS to fix up extra - enum { NIndexesMax = 40 }; + enum { NIndexesMax = 64 }; - BOOST_STATIC_ASSERT( NIndexesMax == NIndexesBase + NIndexesExtra ); + BOOST_STATIC_ASSERT( NIndexesMax <= NIndexesBase + NIndexesExtra*2 ); + BOOST_STATIC_ASSERT( NIndexesMax <= 64 ); // multiKey bits + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::ExtraOld) == 496 ); + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) == 496 ); /* called when loaded from disk */ void onLoad(const Namespace& k); - NamespaceDetails( const DiskLoc &loc, bool _capped ) { - /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */ - firstExtent = lastExtent = capExtent = loc; - datasize = nrecords = 0; - lastExtentSize = 0; - nIndexes = 0; - capped = _capped; - max = 0x7fffffff; - paddingFactor = 1.0; - flags = 0; - capFirstNewRecord = DiskLoc(); - // Signal that we are on first allocation iteration through extents. - capFirstNewRecord.setInvalid(); - // For capped case, signal that we are doing initial extent allocation. - if ( capped ) - deletedList[ 1 ].setInvalid(); - assert( sizeof(dataFileVersion) == 2 ); - dataFileVersion = 0; - indexFileVersion = 0; - multiKeyIndexBits = 0; - reservedA = 0; - extraOffset = 0; - backgroundIndexBuildInProgress = 0; - memset(reserved, 0, sizeof(reserved)); - } + NamespaceDetails( const DiskLoc &loc, bool _capped ); + DiskLoc firstExtent; DiskLoc lastExtent; /* NOTE: capped collections override the meaning of deleted list. deletedList[0] points to a list of free records (DeletedRecord's) for all extents in - the namespace. + the capped namespace. deletedList[1] points to the last record in the prev extent. When the "current extent" changes, this value is updated. !deletedList[1].isValid() when this value is not yet computed. */ DiskLoc deletedList[Buckets]; + void dumpExtents(); + long long datasize; long long nrecords; int lastExtentSize; int nIndexes; + private: IndexDetails _indexes[NIndexesBase]; + + private: + Extent *theCapExtent() const { return capExtent.ext(); } + void advanceCapExtent( const char *ns ); + DiskLoc __capAlloc(int len); + DiskLoc cappedAlloc(const char *ns, int len); + DiskLoc &cappedFirstDeletedInCurExtent(); + bool nextIsInCapExtent( const DiskLoc &dl ) const; public: + DiskLoc& cappedListOfAllDeletedRecords() { return deletedList[0]; } + DiskLoc& cappedLastDelRecLastExtent() { return deletedList[1]; } + void cappedDumpDelInfo(); + bool capLooped() const { return capped && capFirstNewRecord.isValid(); } + bool inCapExtent( const DiskLoc &dl ) const; + void cappedCheckMigrate(); + void cappedTruncateAfter(const char *ns, DiskLoc after, bool inclusive); /** remove rest of the capped collection from this point onward */ + void emptyCappedCollection(const char *ns); + int capped; - int max; // max # of objects for a capped table. + + int max; // max # of objects for a capped table. TODO: should this be 64 bit? double paddingFactor; // 1.0 = no padding. int flags; + DiskLoc capExtent; DiskLoc capFirstNewRecord; @@ -265,22 +299,35 @@ /* when a background index build is in progress, we don't count the index in nIndexes until complete, yet need to still use it in _indexRecord() - thus we use this function for that. */ - int nIndexesBeingBuilt() const { - return nIndexes + backgroundIndexBuildInProgress; - } + int nIndexesBeingBuilt() const { return nIndexes + backgroundIndexBuildInProgress; } /* NOTE: be careful with flags. are we manipulating them in read locks? if so, this isn't thread safe. TODO */ enum NamespaceFlags { - Flag_HaveIdIndex = 1 << 0, // set when we have _id index (ONLY if ensureIdIndex was called -- 0 if that has never been called) - Flag_CappedDisallowDelete = 1 << 1 // set when deletes not allowed during capped table allocation. + Flag_HaveIdIndex = 1 << 0 // set when we have _id index (ONLY if ensureIdIndex was called -- 0 if that has never been called) }; - IndexDetails& idx(int idxNo) { + IndexDetails& idx(int idxNo, bool missingExpected = false ) { if( idxNo < NIndexesBase ) return _indexes[idxNo]; - return extra()->details[idxNo-NIndexesBase]; + Extra *e = extra(); + if ( ! e ){ + if ( missingExpected ) + throw MsgAssertionException( 13283 , "Missing Extra" ); + massert(13282, "missing Extra", e); + } + int i = idxNo - NIndexesBase; + if( i >= NIndexesExtra ) { + e = e->next(this); + if ( ! e ){ + if ( missingExpected ) + throw MsgAssertionException( 13283 , "missing extra" ); + massert(13283, "missing Extra", e); + } + i -= NIndexesExtra; + } + return e->details[i]; } IndexDetails& backgroundIdx() { DEV assert(backgroundIndexBuildInProgress); @@ -292,28 +339,18 @@ int i; int n; NamespaceDetails *d; - Extra *e; IndexIterator(NamespaceDetails *_d) { d = _d; i = 0; n = d->nIndexes; - if( n > NIndexesBase ) - e = d->extra(); } public: int pos() { return i; } // note this is the next one to come bool more() { return i < n; } - IndexDetails& next() { - int k = i; - i++; - return k < NIndexesBase ? d->_indexes[k] : - e->details[k-10]; - } - }; + IndexDetails& next() { return d->idx(i++); } + }; // IndexIterator - IndexIterator ii() { - return IndexIterator(this); - } + IndexIterator ii() { return IndexIterator(this); } /* hackish - find our index # in the indexes array */ @@ -348,14 +385,8 @@ */ IndexDetails& addIndex(const char *thisns, bool resetTransient=true); - void aboutToDeleteAnIndex() { - flags &= ~Flag_HaveIdIndex; - } + void aboutToDeleteAnIndex() { flags &= ~Flag_HaveIdIndex; } - void cappedDisallowDelete() { - flags |= Flag_CappedDisallowDelete; - } - /* returns index of the first index in which the field is present. -1 if not present. */ int fieldIsIndexed(const char *fieldName); @@ -389,6 +420,14 @@ } return -1; } + + void findIndexByType( const string& name , vector& matches ) { + IndexIterator i = ii(); + while ( i.more() ){ + if ( i.next().getSpec().getTypeName() == name ) + matches.push_back( i.pos() - 1 ); + } + } /* @return -1 = not found generally id is first index, so not that expensive an operation (assuming present). @@ -418,43 +457,25 @@ void dumpDeleted(set *extents = 0); - bool capLooped() const { - return capped && capFirstNewRecord.isValid(); - } - // Start from firstExtent by default. DiskLoc firstRecord( const DiskLoc &startExtent = DiskLoc() ) const; // Start from lastExtent by default. DiskLoc lastRecord( const DiskLoc &startExtent = DiskLoc() ) const; - bool inCapExtent( const DiskLoc &dl ) const; - - void checkMigrate(); - long long storageSize( int * numExtents = 0 ); - + private: - bool cappedMayDelete() const { - return !( flags & Flag_CappedDisallowDelete ); - } - Extent *theCapExtent() const { - return capExtent.ext(); - } - void advanceCapExtent( const char *ns ); + DiskLoc _alloc(const char *ns, int len); void maybeComplain( const char *ns, int len ) const; DiskLoc __stdAlloc(int len); - DiskLoc __capAlloc(int len); - DiskLoc _alloc(const char *ns, int len); void compact(); // combine adjacent deleted records - - DiskLoc &firstDeletedInCapExtent(); - bool nextIsInCapExtent( const DiskLoc &dl ) const; - }; - + }; // NamespaceDetails #pragma pack() - /* these are things we know / compute about a namespace that are transient -- things + /* NamespaceDetailsTransient + + these are things we know / compute about a namespace that are transient -- things we don't actually store in the .ns file. so mainly caching of frequently used information. @@ -465,13 +486,15 @@ todo: cleanup code, need abstractions and separation */ class NamespaceDetailsTransient : boost::noncopyable { + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails) == 496 ); + /* general ------------------------------------------------------------- */ private: string _ns; void reset(); static std::map< string, shared_ptr< NamespaceDetailsTransient > > _map; public: - NamespaceDetailsTransient(const char *ns) : _ns(ns), _keysComputed(false), _qcWriteCount(), _cll_enabled() { } + NamespaceDetailsTransient(const char *ns) : _ns(ns), _keysComputed(false), _qcWriteCount(){ } /* _get() is not threadsafe -- see get_inlock() comments */ static NamespaceDetailsTransient& _get(const char *ns); /* use get_w() when doing write operations */ @@ -551,19 +574,6 @@ _qcCache[ pattern ] = make_pair( indexKey, nScanned ); } - /* for collection-level logging -- see CmdLogCollection ----------------- */ - /* assumed to be in write lock for this */ - private: - string _cll_ns; // "local.temp.oplog." + _ns; - bool _cll_enabled; - void cllDrop(); // drop _cll_ns - public: - string cllNS() const { return _cll_ns; } - bool cllEnabled() const { return _cll_enabled; } - void cllStart( int logSizeMb = 256 ); // begin collection level logging - void cllInvalidate(); - bool cllValidateComplete(); - }; /* NamespaceDetailsTransient */ inline NamespaceDetailsTransient& NamespaceDetailsTransient::_get(const char *ns) { @@ -578,23 +588,20 @@ */ class NamespaceIndex { friend class NamespaceCursor; - BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) ); + public: NamespaceIndex(const string &dir, const string &database) : - ht( 0 ), - dir_( dir ), - database_( database ) {} + ht( 0 ), dir_( dir ), database_( database ) {} /* returns true if new db will be created if we init lazily */ bool exists() const; - + void init(); void add_ns(const char *ns, DiskLoc& loc, bool capped) { NamespaceDetails details( loc, capped ); add_ns( ns, details ); } - void add_ns( const char *ns, const NamespaceDetails &details ) { init(); Namespace n(ns); @@ -602,36 +609,19 @@ } /* just for diagnostics */ - size_t detailsOffset(NamespaceDetails *d) { + /*size_t detailsOffset(NamespaceDetails *d) { if ( !ht ) return -1; return ((char *) d) - (char *) ht->nodes; - } - - /* extra space for indexes when more than 10 */ - NamespaceDetails::Extra* allocExtra(const char *ns) { - Namespace n(ns); - Namespace extra(n.extraName().c_str()); // throws userexception if ns name too long - NamespaceDetails *d = details(ns); - massert( 10350 , "allocExtra: base ns missing?", d ); - assert( d->extraOffset == 0 ); - massert( 10351 , "allocExtra: extra already exists", ht->get(extra) == 0 ); - NamespaceDetails::Extra temp; - memset(&temp, 0, sizeof(temp)); - uassert( 10082 , "allocExtra: too many namespaces/collections", ht->put(extra, (NamespaceDetails&) temp)); - NamespaceDetails::Extra *e = (NamespaceDetails::Extra *) ht->get(extra); - d->extraOffset = ((char *) e) - ((char *) d); - assert( d->extra() == e ); - return e; - } + }*/ NamespaceDetails* details(const char *ns) { if ( !ht ) return 0; Namespace n(ns); NamespaceDetails *d = ht->get(n); - if ( d ) - d->checkMigrate(); + if ( d && d->capped ) + d->cappedCheckMigrate(); return d; } @@ -641,11 +631,13 @@ Namespace n(ns); ht->kill(n); - try { - Namespace extra(n.extraName().c_str()); - ht->kill(extra); + for( int i = 0; i<=1; i++ ) { + try { + Namespace extra(n.extraName(i).c_str()); + ht->kill(extra); + } + catch(DBException&) { } } - catch(DBException&) { } } bool find(const char *ns, DiskLoc& loc) { @@ -661,12 +653,17 @@ return ht != 0; } - private: + void getNamespaces( list& tofill , bool onlyCollections = true ) const; + + NamespaceDetails::Extra* newExtra(const char *ns, int n, NamespaceDetails *d); + boost::filesystem::path path() const; + private: + void maybeMkdir() const; - MemoryMappedFile f; - HashTable *ht; + MMF f; + HashTable *ht; string dir_; string database_; }; diff -Nru mongodb-1.4.4/db/nonce.cpp mongodb-1.6.3/db/nonce.cpp --- mongodb-1.4.4/db/nonce.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/nonce.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "nonce.h" extern int do_md5_test(void); @@ -32,7 +32,7 @@ if( _initialized ) return; _initialized = true; -#if defined(__linux__) +#if defined(__linux__) || defined(__sunos__) _devrandom = new ifstream("/dev/urandom", ios::binary|ios::in); massert( 10353 , "can't open dev/urandom", _devrandom->is_open() ); #elif defined(_WIN32) @@ -49,7 +49,7 @@ } nonce Security::getNonce(){ - static mongo::mutex m; + static mongo::mutex m("getNonce"); scoped_lock lk(m); /* question/todo: /dev/random works on OS X. is it better @@ -57,7 +57,7 @@ */ nonce n; -#if defined(__linux__) +#if defined(__linux__) || defined(__sunos__) _devrandom->read((char*)&n, sizeof(n)); massert( 10355 , "devrandom failed", !_devrandom->fail()); #elif defined(_WIN32) @@ -67,6 +67,7 @@ #endif return n; } + unsigned getRandomNumber() { return (unsigned) security.getNonce(); } bool Security::_initialized; Security security; diff -Nru mongodb-1.4.4/db/oplog.cpp mongodb-1.6.3/db/oplog.cpp --- mongodb-1.4.4/db/oplog.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/oplog.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,603 @@ +// @file oplog.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "oplog.h" +#include "repl_block.h" +#include "repl.h" +#include "commands.h" +#include "repl/rs.h" + +namespace mongo { + + void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ); + + int __findingStartInitialTimeout = 5; // configurable for testing + + // cached copies of these...so don't rename them, drop them, etc.!!! + static NamespaceDetails *localOplogMainDetails = 0; + static Database *localDB = 0; + static NamespaceDetails *rsOplogDetails = 0; + void oplogCheckCloseDatabase( Database * db ){ + localDB = 0; + localOplogMainDetails = 0; + rsOplogDetails = 0; + resetSlaveCache(); + } + + static void _logOpUninitialized(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) { + uassert(13288, "replSet error write op to db before replSet initialized", str::startsWith(ns, "local.") || *opstr == 'n'); + } + + /** write an op to the oplog that is already built. + todo : make _logOpRS() call this so we don't repeat ourself? + */ + void _logOpObjRS(const BSONObj& op) { + DEV assertInWriteLock(); + + const OpTime ts = op["ts"]._opTime(); + long long h = op["h"].numberLong(); + + { + const char *logns = rsoplog; + if ( rsOplogDetails == 0 ) { + Client::Context ctx( logns , dbpath, 0, false); + localDB = ctx.db(); + assert( localDB ); + rsOplogDetails = nsdetails(logns); + massert(13389, "local.oplog.rs missing. did you drop it? if so restart server", rsOplogDetails); + } + Client::Context ctx( "" , localDB, false ); + { + int len = op.objsize(); + Record *r = theDataFileMgr.fast_oplog_insert(rsOplogDetails, logns, len); + memcpy(r->data, op.objdata(), len); + } + /* todo: now() has code to handle clock skew. but if the skew server to server is large it will get unhappy. + this code (or code in now() maybe) should be improved. + */ + if( theReplSet ) { + if( !(theReplSet->lastOpTimeWrittenlastOpTimeWritten.toString() << ' ' << endl; + } + theReplSet->lastOpTimeWritten = ts; + theReplSet->lastH = h; + ctx.getClient()->setLastOp( ts.asDate() ); + } + } + } + + static void _logOpRS(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) { + DEV assertInWriteLock(); + static BufBuilder bufbuilder(8*1024); + + if ( strncmp(ns, "local.", 6) == 0 ){ + if ( strncmp(ns, "local.slaves", 12) == 0 ) + resetSlaveCache(); + return; + } + + const OpTime ts = OpTime::now(); + + long long hNew; + if( theReplSet ) { + massert(13312, "replSet error : logOp() but not primary?", theReplSet->box.getState().primary()); + hNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId(); + } + else { + // must be initiation + assert( *ns == 0 ); + hNew = 0; + } + + /* we jump through a bunch of hoops here to avoid copying the obj buffer twice -- + instead we do a single copy to the destination position in the memory mapped file. + */ + + bufbuilder.reset(); + BSONObjBuilder b(bufbuilder); + + b.appendTimestamp("ts", ts.asDate()); + b.append("h", hNew); + + b.append("op", opstr); + b.append("ns", ns); + if ( bb ) + b.appendBool("b", *bb); + if ( o2 ) + b.append("o2", *o2); + BSONObj partial = b.done(); + int posz = partial.objsize(); + int len = posz + obj.objsize() + 1 + 2 /*o:*/; + + Record *r; + DEV assert( logNS == 0 ); + { + const char *logns = rsoplog; + if ( rsOplogDetails == 0 ) { + Client::Context ctx( logns , dbpath, 0, false); + localDB = ctx.db(); + assert( localDB ); + rsOplogDetails = nsdetails(logns); + massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", rsOplogDetails); + } + Client::Context ctx( "" , localDB, false ); + r = theDataFileMgr.fast_oplog_insert(rsOplogDetails, logns, len); + /* todo: now() has code to handle clock skew. but if the skew server to server is large it will get unhappy. + this code (or code in now() maybe) should be improved. + */ + if( theReplSet ) { + if( !(theReplSet->lastOpTimeWrittenlastOpTimeWritten << ' ' << ts << rsLog; + log() << "replSet " << theReplSet->isPrimary() << rsLog; + } + theReplSet->lastOpTimeWritten = ts; + theReplSet->lastH = hNew; + ctx.getClient()->setLastOp( ts.asDate() ); + } + } + + char *p = r->data; + memcpy(p, partial.objdata(), posz); + *((unsigned *)p) += obj.objsize() + 1 + 2; + p += posz - 1; + *p++ = (char) Object; + *p++ = 'o'; + *p++ = 0; + memcpy(p, obj.objdata(), obj.objsize()); + p += obj.objsize(); + *p = EOO; + + if ( logLevel >= 6 ) { + BSONObj temp(r); + log( 6 ) << "logOp:" << temp << endl; + } + } + + /* we write to local.opload.$main: + { ts : ..., op: ..., ns: ..., o: ... } + ts: an OpTime timestamp + op: + "i" insert + "u" update + "d" delete + "c" db cmd + "db" declares presence of a database (ns is set to the db name + '.') + "n" no op + logNS - where to log it. 0/null means "local.oplog.$main". + bb: + if not null, specifies a boolean to pass along to the other side as b: param. + used for "justOne" or "upsert" flags on 'd', 'u' + first: true + when set, indicates this is the first thing we have logged for this database. + thus, the slave does not need to copy down all the data when it sees this. + + note this is used for single collection logging even when --replSet is enabled. + */ + static void _logOpOld(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) { + DEV assertInWriteLock(); + static BufBuilder bufbuilder(8*1024); + + if ( strncmp(ns, "local.", 6) == 0 ){ + if ( strncmp(ns, "local.slaves", 12) == 0 ){ + resetSlaveCache(); + } + return; + } + + const OpTime ts = OpTime::now(); + Client::Context context; + + /* we jump through a bunch of hoops here to avoid copying the obj buffer twice -- + instead we do a single copy to the destination position in the memory mapped file. + */ + + bufbuilder.reset(); + BSONObjBuilder b(bufbuilder); + b.appendTimestamp("ts", ts.asDate()); + b.append("op", opstr); + b.append("ns", ns); + if ( bb ) + b.appendBool("b", *bb); + if ( o2 ) + b.append("o2", *o2); + BSONObj partial = b.done(); + int posz = partial.objsize(); + int len = posz + obj.objsize() + 1 + 2 /*o:*/; + + Record *r; + if( logNS == 0 ) { + logNS = "local.oplog.$main"; + if ( localOplogMainDetails == 0 ) { + Client::Context ctx( logNS , dbpath, 0, false); + localDB = ctx.db(); + assert( localDB ); + localOplogMainDetails = nsdetails(logNS); + assert( localOplogMainDetails ); + } + Client::Context ctx( "" , localDB, false ); + r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len); + } else { + Client::Context ctx( logNS, dbpath, 0, false ); + assert( nsdetails( logNS ) ); + r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len); + } + + char *p = r->data; + memcpy(p, partial.objdata(), posz); + *((unsigned *)p) += obj.objsize() + 1 + 2; + p += posz - 1; + *p++ = (char) Object; + *p++ = 'o'; + *p++ = 0; + memcpy(p, obj.objdata(), obj.objsize()); + p += obj.objsize(); + *p = EOO; + + context.getClient()->setLastOp( ts.asDate() ); + + if ( logLevel >= 6 ) { + BSONObj temp(r); + log( 6 ) << "logging op:" << temp << endl; + } + + } + + static void (*_logOp)(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) = _logOpOld; + void newReplUp() { + replSettings.master = true; + _logOp = _logOpRS; + } + void newRepl() { + replSettings.master = true; + _logOp = _logOpUninitialized; + } + void oldRepl() { _logOp = _logOpOld; } + + void logKeepalive() { + _logOp("n", "", 0, BSONObj(), 0, 0); + } + void logOpComment(const BSONObj& obj) { + _logOp("n", "", 0, obj, 0, 0); + } + void logOpInitiate(const BSONObj& obj) { + _logOpRS("n", "", 0, obj, 0, 0); + } + + /*@ @param opstr: + c userCreateNS + i insert + n no-op / keepalive + d delete / remove + u update + */ + void logOp(const char *opstr, const char *ns, const BSONObj& obj, BSONObj *patt, bool *b) { + if ( replSettings.master ) { + _logOp(opstr, ns, 0, obj, patt, b); + // why? : + //char cl[ 256 ]; + //nsToDatabase( ns, cl ); + } + + logOpForSharding( opstr , ns , obj , patt ); + } + + void createOplog() { + dblock lk; + + const char * ns = "local.oplog.$main"; + + bool rs = !cmdLine._replSet.empty(); + if( rs ) + ns = rsoplog; + + Client::Context ctx(ns); + + NamespaceDetails * nsd = nsdetails( ns ); + + if ( nsd ) { + + if ( cmdLine.oplogSize != 0 ){ + int o = (int)(nsd->storageSize() / ( 1024 * 1024 ) ); + int n = (int)(cmdLine.oplogSize / ( 1024 * 1024 ) ); + if ( n != o ){ + stringstream ss; + ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog"; + log() << ss.str() << endl; + throw UserException( 13257 , ss.str() ); + } + } + + if( rs ) return; + + DBDirectClient c; + BSONObj lastOp = c.findOne( ns, Query().sort(reverseNaturalObj) ); + if ( !lastOp.isEmpty() ) { + OpTime::setLast( lastOp[ "ts" ].date() ); + } + return; + } + + /* create an oplog collection, if it doesn't yet exist. */ + BSONObjBuilder b; + double sz; + if ( cmdLine.oplogSize != 0 ) + sz = (double)cmdLine.oplogSize; + else { + /* not specified. pick a default size */ + sz = 50.0 * 1000 * 1000; + if ( sizeof(int *) >= 8 ) { +#if defined(__APPLE__) + // typically these are desktops (dev machines), so keep it smallish + sz = (256-64) * 1000 * 1000; +#else + sz = 990.0 * 1000 * 1000; + boost::intmax_t free = freeSpace(); //-1 if call not supported. + double fivePct = free * 0.05; + if ( fivePct > sz ) + sz = fivePct; +#endif + } + } + + log() << "******" << endl; + log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB... (use --oplogSize to change)" << endl; + + b.append("size", sz); + b.appendBool("capped", 1); + b.appendBool("autoIndexId", false); + + string err; + BSONObj o = b.done(); + userCreateNS(ns, o, err, false); + if( !rs ) + logOp( "n", "dummy", BSONObj() ); + + /* sync here so we don't get any surprising lag later when we try to sync */ + MemoryMappedFile::flushAll(true); + log() << "******" << endl; + } + + // ------------------------------------- + + struct TestOpTime { + TestOpTime() { + OpTime t; + for ( int i = 0; i < 10; i++ ) { + OpTime s = OpTime::now(); + assert( s != t ); + t = s; + } + OpTime q = t; + assert( q == t ); + assert( !(q != t) ); + } + } testoptime; + + int _dummy_z; + + void pretouchN(vector& v, unsigned a, unsigned b) { + DEV assert( !dbMutex.isWriteLocked() ); + + Client *c = &cc(); + if( c == 0 ) { + Client::initThread("pretouchN"); + c = &cc(); + } + + readlock lk(""); + for( unsigned i = a; i <= b; i++ ) { + const BSONObj& op = v[i]; + const char *which = "o"; + const char *opType = op.getStringField("op"); + if ( *opType == 'i' ) + ; + else if( *opType == 'u' ) + which = "o2"; + else + continue; + /* todo : other operations */ + + try { + BSONObj o = op.getObjectField(which); + BSONElement _id; + if( o.getObjectID(_id) ) { + const char *ns = op.getStringField("ns"); + BSONObjBuilder b; + b.append(_id); + BSONObj result; + Client::Context ctx( ns ); + if( Helpers::findById(cc(), ns, b.done(), result) ) + _dummy_z += result.objsize(); // touch + } + } + catch( DBException& e ) { + log() << "ignoring assertion in pretouchN() " << a << ' ' << b << ' ' << i << ' ' << e.toString() << endl; + } + } + } + + void pretouchOperation(const BSONObj& op) { + + if( dbMutex.isWriteLocked() ) + return; // no point pretouching if write locked. not sure if this will ever fire, but just in case. + + const char *which = "o"; + const char *opType = op.getStringField("op"); + if ( *opType == 'i' ) + ; + else if( *opType == 'u' ) + which = "o2"; + else + return; + /* todo : other operations */ + + try { + BSONObj o = op.getObjectField(which); + BSONElement _id; + if( o.getObjectID(_id) ) { + const char *ns = op.getStringField("ns"); + BSONObjBuilder b; + b.append(_id); + BSONObj result; + readlock lk(ns); + Client::Context ctx( ns ); + if( Helpers::findById(cc(), ns, b.done(), result) ) + _dummy_z += result.objsize(); // touch + } + } + catch( DBException& ) { + log() << "ignoring assertion in pretouchOperation()" << endl; + } + } + + void applyOperation_inlock(const BSONObj& op){ + if( logLevel >= 6 ) + log() << "applying op: " << op << endl; + + assertInWriteLock(); + + OpDebug debug; + BSONObj o = op.getObjectField("o"); + const char *ns = op.getStringField("ns"); + // operation type -- see logOp() comments for types + const char *opType = op.getStringField("op"); + + if ( *opType == 'i' ) { + const char *p = strchr(ns, '.'); + if ( p && strcmp(p, ".system.indexes") == 0 ) { + // updates aren't allowed for indexes -- so we will do a regular insert. if index already + // exists, that is ok. + theDataFileMgr.insert(ns, (void*) o.objdata(), o.objsize()); + } + else { + // do upserts for inserts as we might get replayed more than once + BSONElement _id; + if( !o.getObjectID(_id) ) { + /* No _id. This will be very slow. */ + Timer t; + updateObjects(ns, o, o, true, false, false , debug ); + if( t.millis() >= 2 ) { + RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl; + } + } + else { + BSONObjBuilder b; + b.append(_id); + + /* erh 10/16/2009 - this is probably not relevant any more since its auto-created, but not worth removing */ + RARELY ensureHaveIdIndex(ns); // otherwise updates will be slow + + /* todo : it may be better to do an insert here, and then catch the dup key exception and do update + then. very few upserts will not be inserts... + */ + updateObjects(ns, o, b.done(), true, false, false , debug ); + } + } + } + else if ( *opType == 'u' ) { + RARELY ensureHaveIdIndex(ns); // otherwise updates will be super slow + updateObjects(ns, o, op.getObjectField("o2"), /*upsert*/ op.getBoolField("b"), /*multi*/ false, /*logop*/ false , debug ); + } + else if ( *opType == 'd' ) { + if ( opType[1] == 0 ) + deleteObjects(ns, o, op.getBoolField("b")); + else + assert( opType[1] == 'b' ); // "db" advertisement + } + else if ( *opType == 'n' ) { + // no op + } + else if ( *opType == 'c' ){ + BufBuilder bb; + BSONObjBuilder ob; + _runCommands(ns, o, bb, ob, true, 0); + } + else { + stringstream ss; + ss << "unknown opType [" << opType << "]"; + throw MsgAssertionException( 13141 , ss.str() ); + } + + } + + class ApplyOpsCmd : public Command { + public: + virtual bool slaveOk() const { return false; } + virtual LockType locktype() const { return WRITE; } + ApplyOpsCmd() : Command( "applyOps" ) {} + virtual void help( stringstream &help ) const { + help << "examples: { applyOps : [ ] , preCondition : [ { ns : ... , q : ... , res : ... } ] }"; + } + virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + + if ( cmdObj.firstElement().type() != Array ){ + errmsg = "ops has to be an array"; + return false; + } + + BSONObj ops = cmdObj.firstElement().Obj(); + + { // check input + BSONObjIterator i( ops ); + while ( i.more() ){ + BSONElement e = i.next(); + if ( e.type() == Object ) + continue; + errmsg = "op not an object: "; + errmsg += e.fieldName(); + return false; + } + } + + if ( cmdObj["preCondition"].type() == Array ){ + BSONObjIterator i( cmdObj["preCondition"].Obj() ); + while ( i.more() ){ + BSONObj f = i.next().Obj(); + + BSONObj realres = db.findOne( f["ns"].String() , f["q"].Obj() ); + + Matcher m( f["res"].Obj() ); + if ( ! m.matches( realres ) ){ + result.append( "got" , realres ); + result.append( "whatFailed" , f ); + errmsg = "pre-condition failed"; + return false; + } + } + } + + // apply + int num = 0; + BSONObjIterator i( ops ); + while ( i.more() ){ + BSONElement e = i.next(); + applyOperation_inlock( e.Obj() ); + num++; + } + + result.append( "applied" , num ); + + return true; + } + + DBDirectClient db; + + } applyOpsCmd; + +} diff -Nru mongodb-1.4.4/db/oplog.h mongodb-1.6.3/db/oplog.h --- mongodb-1.4.4/db/oplog.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/oplog.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,214 @@ +// oplog.h - writing to and reading from oplog + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +/* + + local.oplog.$main is the default +*/ + +#pragma once + +#include "pdfile.h" +#include "db.h" +#include "dbhelpers.h" +#include "query.h" +#include "queryoptimizer.h" +#include "../client/dbclient.h" +#include "../util/optime.h" + +namespace mongo { + + void createOplog(); + + void _logOpObjRS(const BSONObj& op); + + /** Write operation to the log (local.oplog.$main) + + @param opstr + "i" insert + "u" update + "d" delete + "c" db cmd + "n" no-op + "db" declares presence of a database (ns is set to the db name + '.') + + See _logOp() in oplog.cpp for more details. + */ + void logOp(const char *opstr, const char *ns, const BSONObj& obj, BSONObj *patt = 0, bool *b = 0); + + void logKeepalive(); + + /** puts obj in the oplog as a comment (a no-op). Just for diags. + convention is + { msg : "text", ... } + */ + void logOpComment(const BSONObj& obj); + + void oplogCheckCloseDatabase( Database * db ); + + extern int __findingStartInitialTimeout; // configurable for testing + + class FindingStartCursor { + public: + FindingStartCursor( const QueryPlan & qp ) : + _qp( qp ), + _findingStart( true ), + _findingStartMode(), + _findingStartTimer( 0 ), + _findingStartCursor( 0 ) + { init(); } + bool done() const { return !_findingStart; } + shared_ptr cRelease() { return _c; } + void next() { + if ( !_findingStartCursor || !_findingStartCursor->c->ok() ) { + _findingStart = false; + _c = _qp.newCursor(); // on error, start from beginning + destroyClientCursor(); + return; + } + switch( _findingStartMode ) { + case Initial: { + if ( !_matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) { + _findingStart = false; // found first record out of query range, so scan normally + _c = _qp.newCursor( _findingStartCursor->c->currLoc() ); + destroyClientCursor(); + return; + } + _findingStartCursor->c->advance(); + RARELY { + if ( _findingStartTimer.seconds() >= __findingStartInitialTimeout ) { + createClientCursor( startLoc( _findingStartCursor->c->currLoc() ) ); + _findingStartMode = FindExtent; + return; + } + } + return; + } + case FindExtent: { + if ( !_matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) { + _findingStartMode = InExtent; + return; + } + DiskLoc prev = prevLoc( _findingStartCursor->c->currLoc() ); + if ( prev.isNull() ) { // hit beginning, so start scanning from here + createClientCursor(); + _findingStartMode = InExtent; + return; + } + // There might be a more efficient implementation than creating new cursor & client cursor each time, + // not worrying about that for now + createClientCursor( prev ); + return; + } + case InExtent: { + if ( _matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) { + _findingStart = false; // found first record in query range, so scan normally + _c = _qp.newCursor( _findingStartCursor->c->currLoc() ); + destroyClientCursor(); + return; + } + _findingStartCursor->c->advance(); + return; + } + default: { + massert( 12600, "invalid _findingStartMode", false ); + } + } + } + bool prepareToYield() { + if ( _findingStartCursor ) { + return _findingStartCursor->prepareToYield( _yieldData ); + } + return true; + } + void recoverFromYield() { + if ( _findingStartCursor ) { + if ( !ClientCursor::recoverFromYield( _yieldData ) ) { + _findingStartCursor = 0; + } + } + } + private: + enum FindingStartMode { Initial, FindExtent, InExtent }; + const QueryPlan &_qp; + bool _findingStart; + FindingStartMode _findingStartMode; + auto_ptr< CoveredIndexMatcher > _matcher; + Timer _findingStartTimer; + ClientCursor * _findingStartCursor; + shared_ptr _c; + ClientCursor::YieldData _yieldData; + DiskLoc startLoc( const DiskLoc &rec ) { + Extent *e = rec.rec()->myExtent( rec ); + if ( !_qp.nsd()->capLooped() || ( e->myLoc != _qp.nsd()->capExtent ) ) + return e->firstRecord; + // Likely we are on the fresh side of capExtent, so return first fresh record. + // If we are on the stale side of capExtent, then the collection is small and it + // doesn't matter if we start the extent scan with capFirstNewRecord. + return _qp.nsd()->capFirstNewRecord; + } + + // should never have an empty extent in the oplog, so don't worry about that case + DiskLoc prevLoc( const DiskLoc &rec ) { + Extent *e = rec.rec()->myExtent( rec ); + if ( _qp.nsd()->capLooped() ) { + if ( e->xprev.isNull() ) + e = _qp.nsd()->lastExtent.ext(); + else + e = e->xprev.ext(); + if ( e->myLoc != _qp.nsd()->capExtent ) + return e->firstRecord; + } else { + if ( !e->xprev.isNull() ) { + e = e->xprev.ext(); + return e->firstRecord; + } + } + return DiskLoc(); // reached beginning of collection + } + void createClientCursor( const DiskLoc &startLoc = DiskLoc() ) { + shared_ptr c = _qp.newCursor( startLoc ); + _findingStartCursor = new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns()); + } + void destroyClientCursor() { + if ( _findingStartCursor ) { + ClientCursor::erase( _findingStartCursor->cursorid ); + _findingStartCursor = 0; + } + } + void init() { + // Use a ClientCursor here so we can release db mutex while scanning + // oplog (can take quite a while with large oplogs). + shared_ptr c = _qp.newReverseCursor(); + _findingStartCursor = new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()); + _findingStartTimer.reset(); + _findingStartMode = Initial; + BSONElement tsElt = _qp.originalQuery()[ "ts" ]; + massert( 13044, "no ts field in query", !tsElt.eoo() ); + BSONObjBuilder b; + b.append( tsElt ); + BSONObj tsQuery = b.obj(); + _matcher.reset(new CoveredIndexMatcher(tsQuery, _qp.indexKey())); + } + }; + + void pretouchOperation(const BSONObj& op); + void pretouchN(vector&, unsigned a, unsigned b); + + void applyOperation_inlock(const BSONObj& op); +} diff -Nru mongodb-1.4.4/db/oplogreader.h mongodb-1.6.3/db/oplogreader.h --- mongodb-1.4.4/db/oplogreader.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/oplogreader.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,109 @@ +/** @file oplogreader.h */ + +#pragma once + +#include "../client/dbclient.h" +#include "../client/constants.h" +#include "dbhelpers.h" + +namespace mongo { + + /* started abstracting out the querying of the primary/master's oplog + still fairly awkward but a start. + */ + class OplogReader { + auto_ptr _conn; + auto_ptr cursor; + public: + + OplogReader() { + DEV log() << "TEMP *** OplogReader()" << endl; + } + ~OplogReader() { + DEV log() << "TEMP *** ~OplogReader()" << endl; + } + + void resetCursor() { + DEV log() << "TEMP *** OplogReader::resetCursor" << endl; + cursor.reset(); + } + void resetConnection() { + DEV log() << "TEMP *** OplogReader::resetConnection" << endl; + cursor.reset(); + _conn.reset(); + } + DBClientConnection* conn() { return _conn.get(); } + BSONObj findOne(const char *ns, const Query& q) { + return conn()->findOne(ns, q); + } + + BSONObj getLastOp(const char *ns) { + return findOne(ns, Query().sort(reverseNaturalObj)); + } + + /* ok to call if already connected */ + bool connect(string hostname); + + void tailCheck() { + if( cursor.get() && cursor->isDead() ) { + log() << "repl: old cursor isDead, will initiate a new one" << endl; + resetCursor(); + } + } + + bool haveCursor() { return cursor.get() != 0; } + + void query(const char *ns, const BSONObj& query) { + assert( !haveCursor() ); + cursor = _conn->query(ns, query, 0, 0, 0, QueryOption_SlaveOk); + } + + void tailingQuery(const char *ns, const BSONObj& query) { + assert( !haveCursor() ); + log(2) << "repl: " << ns << ".find(" << query.toString() << ')' << endl; + cursor = _conn->query( ns, query, 0, 0, 0, + QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay | + /* TODO: slaveok maybe shouldn't use? */ + QueryOption_AwaitData + ); + } + + void tailingQueryGTE(const char *ns, OpTime t) { + BSONObjBuilder q; + q.appendDate("$gte", t.asDate()); + BSONObjBuilder query; + query.append("ts", q.done()); + tailingQuery(ns, query.done()); + } + + bool more() { + assert( cursor.get() ); + return cursor->more(); + } + bool moreInCurrentBatch() { + assert( cursor.get() ); + return cursor->moreInCurrentBatch(); + } + + /* old mongod's can't do the await flag... */ + bool awaitCapable() { + return cursor->hasResultFlag(ResultFlag_AwaitCapable); + } + + void peek(vector& v, int n) { + if( cursor.get() ) + cursor->peek(v,n); + } + + BSONObj nextSafe() { return cursor->nextSafe(); } + + BSONObj next() { + return cursor->next(); + } + + void putBack(BSONObj op) { + cursor->putBack(op); + } + }; + +} diff -Nru mongodb-1.4.4/db/pdfile.cpp mongodb-1.6.3/db/pdfile.cpp --- mongodb-1.4.4/db/pdfile.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/pdfile.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -24,7 +24,7 @@ _ disallow system* manipulations from the database. */ -#include "stdafx.h" +#include "pch.h" #include "pdfile.h" #include "db.h" #include "../util/mmap.h" @@ -45,7 +45,16 @@ namespace mongo { - const int MaxExtentSize = 0x7ff00000; + bool inDBRepair = false; + struct doingRepair { + doingRepair(){ + assert( ! inDBRepair ); + inDBRepair = true; + } + ~doingRepair(){ + inDBRepair = false; + } + }; map BackgroundOperation::dbsInProg; set BackgroundOperation::nsInProg; @@ -100,6 +109,7 @@ string dbpath = "/data/db/"; bool directoryperdb = false; string repairpath; + string pidfilepath; DataFileMgr theDataFileMgr; DatabaseHolder dbHolder; @@ -111,7 +121,7 @@ void ensureIdIndexForNewNs(const char *ns) { if ( ( strstr( ns, ".system." ) == 0 || legalClientSystemNS( ns , false ) ) && strstr( ns, ".$freelist" ) == 0 ){ - log( 1 ) << "adding _id index for new collection" << endl; + log( 1 ) << "adding _id index for collection " << ns << endl; ensureHaveIdIndex( ns ); } } @@ -145,29 +155,29 @@ sz = 1000000000; int z = ((int)sz) & 0xffffff00; assert( z > len ); - DEV log() << "initialExtentSize(" << len << ") returns " << z << endl; + //DEV tlog() << "initialExtentSize(" << len << ") returns " << z << endl; return z; } - bool _userCreateNS(const char *ns, const BSONObj& j, string& err) { + bool _userCreateNS(const char *ns, const BSONObj& options, string& err, bool *deferIdIndex) { if ( nsdetails(ns) ) { err = "collection already exists"; return false; } - log(1) << "create collection " << ns << ' ' << j << '\n'; + log(1) << "create collection " << ns << ' ' << options << '\n'; /* todo: do this only when we have allocated space successfully? or we could insert with a { ok: 0 } field and then go back and set to ok : 1 after we are done. */ bool isFreeList = strstr(ns, ".$freelist") != 0; if( !isFreeList ) - addNewNamespaceToCatalog(ns, j.isEmpty() ? 0 : &j); + addNewNamespaceToCatalog(ns, options.isEmpty() ? 0 : &options); long long size = initialExtentSize(128); - BSONElement e = j.getField("size"); + BSONElement e = options.getField("size"); if ( e.isNumber() ) { - size = (long long) e.number(); + size = e.numberLong(); size += 256; size &= 0xffffffffffffff00LL; } @@ -176,18 +186,18 @@ bool newCapped = false; int mx = 0; - e = j.getField("capped"); + e = options.getField("capped"); if ( e.type() == Bool && e.boolean() ) { newCapped = true; - e = j.getField("max"); + e = options.getField("max"); if ( e.isNumber() ) { - mx = (int) e.number(); + mx = e.numberInt(); } } // $nExtents just for debug/testing. We create '$nExtents' extents, // each of size 'size'. - e = j.getField( "$nExtents" ); + e = options.getField( "$nExtents" ); int nExtents = int( e.number() ); Database *database = cc().database(); if ( nExtents > 0 ) { @@ -201,7 +211,7 @@ } } else { while ( size > 0 ) { - int max = MongoDataFile::maxSize() - MDFHeader::headerSize(); + int max = MongoDataFile::maxSize() - DataFileHeader::HeaderSize; int desiredExtentSize = (int) (size > max ? max : size); Extent *e = database->allocExtent( ns, desiredExtentSize, newCapped ); size -= e->length; @@ -211,15 +221,22 @@ NamespaceDetails *d = nsdetails(ns); assert(d); - if ( j.getField( "autoIndexId" ).type() ) { - if ( j["autoIndexId"].trueValue() ){ - ensureIdIndexForNewNs( ns ); + bool ensure = false; + if ( options.getField( "autoIndexId" ).type() ) { + if ( options["autoIndexId"].trueValue() ){ + ensure = true; } } else { if ( !newCapped ) { - ensureIdIndexForNewNs( ns ); + ensure=true; } } + if( ensure ) { + if( deferIdIndex ) + *deferIdIndex = true; + else + ensureIdIndexForNewNs( ns ); + } if ( mx > 0 ) d->max = mx; @@ -227,23 +244,25 @@ return true; } - // { ..., capped: true, size: ..., max: ... } - // returns true if successful - bool userCreateNS(const char *ns, BSONObj j, string& err, bool logForReplication) { + /** { ..., capped: true, size: ..., max: ... } + @param deferIdIndex - if not not, defers id index creation. sets the bool value to true if we wanted to create the id index. + @return true if successful + */ + bool userCreateNS(const char *ns, BSONObj options, string& err, bool logForReplication, bool *deferIdIndex) { const char *coll = strchr( ns, '.' ) + 1; massert( 10356 , "invalid ns", coll && *coll ); char cl[ 256 ]; nsToDatabase( ns, cl ); - bool ok = _userCreateNS(ns, j, err); + bool ok = _userCreateNS(ns, options, err, deferIdIndex); if ( logForReplication && ok ) { - if ( j.getField( "create" ).eoo() ) { + if ( options.getField( "create" ).eoo() ) { BSONObjBuilder b; b << "create" << coll; - b.appendElements( j ); - j = b.obj(); + b.appendElements( options ); + options = b.obj(); } string logNs = string( cl ) + ".$cmd"; - logOp("c", logNs.c_str(), j); + logOp("c", logNs.c_str(), options); } return ok; } @@ -251,10 +270,19 @@ /*---------------------------------------------------------------------*/ int MongoDataFile::maxSize() { - if ( sizeof( int* ) == 4 ) + if ( sizeof( int* ) == 4 ) { return 512 * 1024 * 1024; - else + } else if ( cmdLine.smallfiles ) { + return 0x7ff00000 >> 2; + } else { return 0x7ff00000; + } + } + + void MongoDataFile::badOfs(int ofs) const { + stringstream ss; + ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database"; + uasserted(13440, ss.str()); } int MongoDataFile::defaultSize( const char *filename ) const { @@ -286,7 +314,7 @@ very simple temporary implementation - we will in future look up the quota from the grid database */ - if ( cmdLine.quota && fileNo > cmdLine.quotaFiles && !boost::filesystem::exists(filename) ) { + if ( cmdLine.quota && fileNo > cmdLine.quotaFiles && !MMF::exists(filename) ) { /* todo: if we were adding / changing keys in an index did we do some work previously that needs cleaning up? Possible. We should check code like that and have it catch the exception and do @@ -322,7 +350,8 @@ return; } - header = (MDFHeader *) mmf.map(filename, size); + _p = mmf.map(filename, size); + header = (DataFileHeader *) _p.at(0, DataFileHeader::HeaderSize); if( sizeof(char *) == 4 ) uassert( 10084 , "can't map file memory - mongo requires 64 bit build for larger datasets", header); else @@ -330,6 +359,10 @@ header->init(fileNo, size); } + void MongoDataFile::flush( bool sync ){ + mmf.flush( sync ); + } + void addNewExtentToNamespace(const char *ns, Extent *e, DiskLoc eloc, DiskLoc emptyLoc, bool capped) { DiskLoc oldExtentLoc; NamespaceIndex *ni = nsindex(ns); @@ -354,7 +387,7 @@ Extent* MongoDataFile::createExtent(const char *ns, int approxSize, bool newCapped, int loops) { massert( 10357 , "shutdown in progress", !goingAway ); - massert( 10358 , "bad new extent size", approxSize >= 0 && approxSize <= MaxExtentSize ); + massert( 10358 , "bad new extent size", approxSize >= 0 && approxSize <= Extent::maxSize() ); massert( 10359 , "header==0 on new extent: 32 bit mmap space exceeded?", header ); // null if file open failed int ExtentSize = approxSize <= header->unusedLength ? approxSize : header->unusedLength; DiskLoc loc; @@ -377,8 +410,8 @@ addNewExtentToNamespace(ns, e, loc, emptyLoc, newCapped); - DEV log() << "new extent " << ns << " size: 0x" << hex << ExtentSize << " loc: 0x" << hex << offset - << " emptyLoc:" << hex << emptyLoc.getOfs() << dec << endl; + DEV tlog(1) << "new extent " << ns << " size: 0x" << hex << ExtentSize << " loc: 0x" << hex << offset + << " emptyLoc:" << hex << emptyLoc.getOfs() << dec << endl; return e; } @@ -447,6 +480,7 @@ /*---------------------------------------------------------------------*/ DiskLoc Extent::reuse(const char *nsname) { + /*TODOMMF - work to do when extent is freed. */ log(3) << "reset extent was:" << nsDiagnostic.buf << " now:" << nsname << '\n'; massert( 10360 , "Extent::reset bad magic value", magic == 0x41424344 ); xnext.Null(); @@ -456,13 +490,14 @@ lastRecord.Null(); DiskLoc emptyLoc = myLoc; - emptyLoc.inc( (extentData-(char*)this) ); + emptyLoc.inc( (int) (_extentData-(char*)this) ); + + int delRecLength = length - (_extentData - (char *) this); + //DeletedRecord *empty1 = (DeletedRecord *) extentData; + DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, delRecLength);//(DeletedRecord *) getRecord(emptyLoc); + //assert( empty == empty1 ); - int delRecLength = length - (extentData - (char *) this); - DeletedRecord *empty1 = (DeletedRecord *) extentData; - DeletedRecord *empty = (DeletedRecord *) getRecord(emptyLoc); - assert( empty == empty1 ); - memset(empty, delRecLength, 1); + // do we want to zero the record? memset(empty, ...) empty->lengthWithHeaders = delRecLength; empty->extentOfs = myLoc.getOfs(); @@ -483,19 +518,20 @@ lastRecord.Null(); DiskLoc emptyLoc = myLoc; - emptyLoc.inc( (extentData-(char*)this) ); + emptyLoc.inc( (int) (_extentData-(char*)this) ); - DeletedRecord *empty1 = (DeletedRecord *) extentData; - DeletedRecord *empty = (DeletedRecord *) getRecord(emptyLoc); - assert( empty == empty1 ); - empty->lengthWithHeaders = _length - (extentData - (char *) this); + int l = _length - (_extentData - (char *) this); + //DeletedRecord *empty1 = (DeletedRecord *) extentData; + DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, l); + //assert( empty == empty1 ); + empty->lengthWithHeaders = l; empty->extentOfs = myLoc.getOfs(); return emptyLoc; } /* Record* Extent::newRecord(int len) { - if( firstEmptyRegion.isNull() ) + if( firstEmptyRegion.isNull() )8 return 0; assert(len > 0); @@ -539,12 +575,20 @@ } */ + int Extent::maxSize() { + int maxExtentSize = 0x7ff00000; + if ( cmdLine.smallfiles ) { + maxExtentSize >>= 2; + } + return maxExtentSize; + } + /*---------------------------------------------------------------------*/ - auto_ptr DataFileMgr::findAll(const char *ns, const DiskLoc &startLoc) { + shared_ptr DataFileMgr::findAll(const char *ns, const DiskLoc &startLoc) { NamespaceDetails * d = nsdetails( ns ); if ( ! d ) - return auto_ptr(new BasicCursor(DiskLoc())); + return shared_ptr(new BasicCursor(DiskLoc())); DiskLoc loc = d->firstExtent; Extent *e = getExtent(loc); @@ -569,10 +613,10 @@ } if ( d->capped ) - return auto_ptr< Cursor >( new ForwardCappedCursor( d , startLoc ) ); + return shared_ptr( new ForwardCappedCursor( d , startLoc ) ); if ( !startLoc.isNull() ) - return auto_ptr(new BasicCursor( startLoc )); + return shared_ptr(new BasicCursor( startLoc )); while ( e->firstRecord.isNull() && !e->xnext.isNull() ) { /* todo: if extent is empty, free it for reuse elsewhere. @@ -583,13 +627,13 @@ // it might be nice to free the whole extent here! but have to clean up free recs then. e = e->getNextExtent(); } - return auto_ptr(new BasicCursor( e->firstRecord )); + return shared_ptr(new BasicCursor( e->firstRecord )); } /* get a table scan cursor, but can be forward or reverse direction. order.$natural - if set, > 0 means forward (asc), < 0 backward (desc). */ - auto_ptr findTableScan(const char *ns, const BSONObj& order, const DiskLoc &startLoc) { + shared_ptr findTableScan(const char *ns, const BSONObj& order, const DiskLoc &startLoc) { BSONElement el = order.getField("$natural"); // e.g., { $natural : -1 } if ( el.number() >= 0 ) @@ -599,19 +643,19 @@ NamespaceDetails *d = nsdetails(ns); if ( !d ) - return auto_ptr(new BasicCursor(DiskLoc())); + return shared_ptr(new BasicCursor(DiskLoc())); if ( !d->capped ) { if ( !startLoc.isNull() ) - return auto_ptr(new ReverseCursor( startLoc )); + return shared_ptr(new ReverseCursor( startLoc )); Extent *e = d->lastExtent.ext(); while ( e->lastRecord.isNull() && !e->xprev.isNull() ) { OCCASIONALLY out() << " findTableScan: extent empty, skipping ahead" << endl; e = e->getPrevExtent(); } - return auto_ptr(new ReverseCursor( e->lastRecord )); + return shared_ptr(new ReverseCursor( e->lastRecord )); } else { - return auto_ptr< Cursor >( new ReverseCappedCursor( d, startLoc ) ); + return shared_ptr( new ReverseCappedCursor( d, startLoc ) ); } } @@ -663,7 +707,7 @@ NamespaceDetails *freeExtents = nsdetails(s.c_str()); if( freeExtents == 0 ) { string err; - _userCreateNS(s.c_str(), BSONObj(), err); + _userCreateNS(s.c_str(), BSONObj(), err, 0); freeExtents = nsdetails(s.c_str()); massert( 10361 , "can't create .$freelist", freeExtents); } @@ -690,7 +734,8 @@ void dropCollection( const string &name, string &errmsg, BSONObjBuilder &result ) { log(1) << "dropCollection: " << name << endl; NamespaceDetails *d = nsdetails(name.c_str()); - assert( d ); + if( d == 0 ) + return; BackgroundOperation::assertNoBgOpInProgForNs(name.c_str()); @@ -698,14 +743,18 @@ try { assert( dropIndexes(d, name.c_str(), "*", errmsg, result, true) ); } - catch( DBException& ) { - uasserted(12503,"drop: dropIndexes for collection failed - consider trying repair"); + catch( DBException& e ) { + stringstream ss; + ss << "drop: dropIndexes for collection failed - consider trying repair "; + ss << " cause: " << e.what(); + uasserted(12503,ss.str()); } assert( d->nIndexes == 0 ); } log(1) << "\t dropIndexes done" << endl; result.append("ns", name.c_str()); ClientCursor::invalidate(name.c_str()); + Client::invalidateNS( name ); Top::global.collectionDropped( name ); dropNS(name); } @@ -831,7 +880,7 @@ NamespaceDetails *d, NamespaceDetailsTransient *nsdt, Record *toupdate, const DiskLoc& dl, - const char *_buf, int _len, OpDebug& debug) + const char *_buf, int _len, OpDebug& debug, bool &changedId, bool god) { StringBuilder& ss = debug.str; dassert( toupdate == dl.rec() ); @@ -858,17 +907,17 @@ below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ vector changes; - getIndexChanges(changes, *d, objNew, objOld); + getIndexChanges(changes, *d, objNew, objOld, changedId); dupCheck(changes, *d, dl); if ( toupdate->netLength() < objNew.objsize() ) { // doesn't fit. reallocate ----------------------------------------------------- - uassert( 10003 , "E10003 failing update: objects in a capped ns cannot grow", !(d && d->capped)); + uassert( 10003 , "failing update: objects in a capped ns cannot grow", !(d && d->capped)); d->paddingTooSmall(); if ( cc().database()->profile ) ss << " moved "; deleteRecord(ns, toupdate, dl); - return insert(ns, objNew.objdata(), objNew.objsize(), false); + return insert(ns, objNew.objdata(), objNew.objsize(), god); } nsdt->notifyOfWriteOp(); @@ -891,13 +940,14 @@ } assert( !dl.isNull() ); BSONObj idxKey = idx.info.obj().getObjectField("key"); + Ordering ordering = Ordering::make(idxKey); keyUpdates += changes[x].added.size(); for ( unsigned i = 0; i < changes[x].added.size(); i++ ) { try { /* we did the dupCheck() above. so we don't have to worry about it here. */ idx.head.btree()->bt_insert( idx.head, - dl, *changes[x].added[i], idxKey, /*dupsAllowed*/true, idx); + dl, *changes[x].added[i], ordering, /*dupsAllowed*/true, idx); } catch (AssertionException& e) { ss << " exception update index "; @@ -915,15 +965,15 @@ } int followupExtentSize(int len, int lastExtentLen) { - assert( len < MaxExtentSize ); + assert( len < Extent::maxSize() ); int x = initialExtentSize(len); int y = (int) (lastExtentLen < 4000000 ? lastExtentLen * 4.0 : lastExtentLen * 1.2); int sz = y > x ? y : x; if ( sz < lastExtentLen ) sz = lastExtentLen; - else if ( sz > MaxExtentSize ) - sz = MaxExtentSize; + else if ( sz > Extent::maxSize() ) + sz = Extent::maxSize(); sz = ((int)sz) & 0xffffff00; assert( sz > len ); @@ -937,6 +987,7 @@ BSONObjSetDefaultOrder keys; idx.getKeysFromObject(obj, keys); BSONObj order = idx.keyPattern(); + Ordering ordering = Ordering::make(order); int n = 0; for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { if( ++n == 2 ) { @@ -945,10 +996,10 @@ assert( !recordLoc.isNull() ); try { idx.head.btree()->bt_insert(idx.head, recordLoc, - *i, order, dupsAllowed, idx); + *i, ordering, dupsAllowed, idx); } catch (AssertionException& e) { - if( e.code == 10287 && idxNo == d->nIndexes ) { + if( e.getCode() == 10287 && idxNo == d->nIndexes ) { DEV log() << "info: caught key already in index on bg indexing (ok)" << endl; continue; } @@ -980,9 +1031,9 @@ auto_ptr i = sorter.iterator(); while( i->more() ) { BSONObjExternalSorter::Data d = i->next(); - cout << d.second.toString() << endl; + /*cout << d.second.toString() << endl; cout << d.first.objsize() << endl; - cout<<"SORTER next:" << d.first.toString() << endl; + cout<<"SORTER next:" << d.first.toString() << endl;*/ } } @@ -993,10 +1044,10 @@ Timer t; - log() << "Buildindex " << ns << " idxNo:" << idxNo << ' ' << idx.info.obj().toString() << endl; + tlog(1) << "fastBuildIndex " << ns << " idxNo:" << idxNo << ' ' << idx.info.obj().toString() << endl; bool dupsAllowed = !idx.unique(); - bool dropDups = idx.dropDups(); + bool dropDups = idx.dropDups() || inDBRepair; BSONObj order = idx.keyPattern(); idx.head.Null(); @@ -1005,11 +1056,11 @@ /* get and sort all the keys ----- */ unsigned long long n = 0; - auto_ptr c = theDataFileMgr.findAll(ns); + shared_ptr c = theDataFileMgr.findAll(ns); BSONObjExternalSorter sorter(order); sorter.hintNumObjects( d->nrecords ); unsigned long long nkeys = 0; - ProgressMeter & pm = op->setMessage( "index: (1/3) external sort" , d->nrecords , 10 ); + ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->nrecords , 10 ) ); while ( c->ok() ) { BSONObj o = c->current(); DiskLoc loc = c->currLoc(); @@ -1048,7 +1099,7 @@ BtreeBuilder btBuilder(dupsAllowed, idx); BSONObj keyLast; auto_ptr i = sorter.iterator(); - pm = op->setMessage( "index: (2/3) btree bottom up" , nkeys , 10 ); + assert( pm == op->setMessage( "index: (2/3) btree bottom up" , nkeys , 10 ) ); while( i->more() ) { RARELY killCurrentOp.checkForInterrupt(); BSONObjExternalSorter::Data d = i->next(); @@ -1102,8 +1153,8 @@ unsigned long long n = 0; auto_ptr cc; { - auto_ptr c = theDataFileMgr.findAll(ns); - cc.reset( new ClientCursor(c, ns, false) ); + shared_ptr c = theDataFileMgr.findAll(ns); + cc.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, ns) ); } CursorId id = cc->cursorid; @@ -1155,6 +1206,7 @@ void prep(const char *ns, NamespaceDetails *d) { assertInWriteLock(); + uassert( 13130 , "can't start bg index b/c in recursive lock (db.eval?)" , dbMutex.getState() == 1 ); bgJobsInProgress.insert(d); d->backgroundIndexBuildInProgress = 1; d->nIndexes--; @@ -1196,7 +1248,7 @@ // throws DBException static void buildAnIndex(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo, bool background) { - log() << "building new index on " << idx.keyPattern() << " for " << ns << ( background ? " background" : "" ) << endl; + tlog() << "building new index on " << idx.keyPattern() << " for " << ns << ( background ? " background" : "" ) << endl; Timer t; unsigned long long n; @@ -1205,7 +1257,7 @@ } assert( !BackgroundOperation::inProgForNs(ns.c_str()) ); // should have been checked earlier, better not be... - if( !background ) { + if( inDBRepair || !background ) { n = fastBuildIndex(ns.c_str(), d, idx, idxNo); assert( !idx.head.isNull() ); } @@ -1213,7 +1265,7 @@ BackgroundIndexBuildJob j(ns.c_str()); n = j.go(ns, d, idx, idxNo); } - log() << "done for " << n << " records " << t.millis() / 1000.0 << "secs" << endl; + tlog() << "done for " << n << " records " << t.millis() / 1000.0 << "secs" << endl; } /* add keys to indexes for a new record */ @@ -1289,17 +1341,21 @@ void DataFileMgr::insertAndLog( const char *ns, const BSONObj &o, bool god ) { BSONObj tmp = o; - insert( ns, tmp, god ); + insertWithObjMod( ns, tmp, god ); logOp( "i", ns, tmp ); } - DiskLoc DataFileMgr::insert(const char *ns, BSONObj &o, bool god) { + DiskLoc DataFileMgr::insertWithObjMod(const char *ns, BSONObj &o, bool god) { DiskLoc loc = insert( ns, o.objdata(), o.objsize(), god ); if ( !loc.isNull() ) o = BSONObj( loc.rec() ); return loc; } + void DataFileMgr::insertNoReturnVal(const char *ns, BSONObj o, bool god) { + insert( ns, o.objdata(), o.objsize(), god ); + } + bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection); // We are now doing two btree scans for all unique indexes (one here, and one when we've @@ -1320,13 +1376,13 @@ } } } - + /* note: if god==true, you may pass in obuf of NULL and then populate the returned DiskLoc after the call -- that will prevent a double buffer copy in some cases (btree.cpp). */ DiskLoc DataFileMgr::insert(const char *ns, const void *obuf, int len, bool god, const BSONElement &writeId, bool mayAddIndex) { bool wouldAddIndex = false; - massert( 10093 , "cannot insert into reserved $ collection", god || strchr(ns, '$') == 0 ); + massert( 10093 , "cannot insert into reserved $ collection", god || nsDollarCheck( ns ) ); uassert( 10094 , "invalid ns", strchr( ns , '.' ) > 0 ); const char *sys = strstr(ns, "system."); if ( sys ) { @@ -1366,6 +1422,7 @@ string tabletoidxns; if ( addIndex ) { + assert( obuf ); BSONObj io((const char *) obuf); if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex) ) return DiskLoc(); @@ -1428,7 +1485,7 @@ } } if ( loc.isNull() ) { - log() << "out of space in datafile " << ns << " capped:" << d->capped << endl; + log() << "insert: couldn't alloc space for object ns:" << ns << " capped:" << d->capped << endl; assert(d->capped); return DiskLoc(); } @@ -1468,15 +1525,24 @@ NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp(); if ( tableToIndex ) { + uassert( 13143 , "can't create index on system.indexes" , tabletoidxns.find( ".system.indexes" ) == string::npos ); + BSONObj info = loc.obj(); bool background = info["background"].trueValue(); + if( background && cc().isSyncThread() ) { + /* don't do background indexing on slaves. there are nuances. this could be added later + but requires more code. + */ + log() << "info: indexing in foreground on this replica; was a background index build on the primary" << endl; + background = false; + } int idxNo = tableToIndex->nIndexes; IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str(), !background); // clear transient info caches so they refresh; increments nIndexes idx.info = loc; try { buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background); - } catch( DBException& ) { + } catch( DBException& e ) { // save our error msg string as an exception or dropIndexes will overwrite our message LastError *le = lastError.get(); int savecode = 0; @@ -1485,6 +1551,10 @@ savecode = le->code; saveerrmsg = le->msg; } + else { + savecode = e.getCode(); + saveerrmsg = e.what(); + } // roll back this index string name = idx.indexName(); @@ -1494,7 +1564,7 @@ if( !ok ) { log() << "failed to drop index after a unique key error building it: " << errmsg << ' ' << tabletoidxns << ' ' << name << endl; } - + assert( le && !saveerrmsg.empty() ); raiseError(savecode,saveerrmsg.c_str()); throw; @@ -1571,21 +1641,46 @@ namespace mongo { - void dropDatabase(const char *ns) { - // ns is of the form ".$cmd" - char db[256]; - nsToDatabase(ns, db); + void dropAllDatabasesExceptLocal() { + writelock lk(""); + + vector n; + getDatabaseNames(n); + if( n.size() == 0 ) return; + log() << "dropAllDatabasesExceptLocal " << n.size() << endl; + for( vector::iterator i = n.begin(); i != n.end(); i++ ) { + if( *i != "local" ) { + Client::Context ctx(*i); + dropDatabase(*i); + } + } + } + + void dropDatabase(string db) { log(1) << "dropDatabase " << db << endl; + assert( cc().database() ); assert( cc().database()->name == db ); - BackgroundOperation::assertNoBgOpInProgForDb(db); + BackgroundOperation::assertNoBgOpInProgForDb(db.c_str()); - closeDatabase( db ); - _deleteDataFiles(db); + Client::invalidateDB( db ); + + closeDatabase( db.c_str() ); + _deleteDataFiles( db.c_str() ); } typedef boost::filesystem::path Path; + void boostRenameWrapper( const Path &from, const Path &to ) { + try { + boost::filesystem::rename( from, to ); + } catch ( const boost::filesystem::filesystem_error & ) { + // boost rename doesn't work across partitions + boost::filesystem::copy_file( from, to); + boost::filesystem::remove( from ); + } + } + // back up original database files to 'temp' dir void _renameForBackup( const char *database, const Path &reservedPath ) { Path newPath( reservedPath ); @@ -1599,7 +1694,7 @@ virtual bool apply( const Path &p ) { if ( !boost::filesystem::exists( p ) ) return false; - boost::filesystem::rename( p, newPath_ / ( p.leaf() + ".bak" ) ); + boostRenameWrapper( p, newPath_ / ( p.leaf() + ".bak" ) ); return true; } virtual const char * op() const { @@ -1622,7 +1717,7 @@ virtual bool apply( const Path &p ) { if ( !boost::filesystem::exists( p ) ) return false; - boost::filesystem::rename( p, newPath_ / p.leaf() ); + boostRenameWrapper( p, newPath_ / p.leaf() ); return true; } virtual const char * op() const { @@ -1676,32 +1771,33 @@ #include namespace mongo { #endif - boost::intmax_t freeSpace() { + boost::intmax_t freeSpace ( const string &path ) { #if !defined(_WIN32) struct statvfs info; - assert( !statvfs( dbpath.c_str() , &info ) ); + assert( !statvfs( path.c_str() , &info ) ); return boost::intmax_t( info.f_bavail ) * info.f_frsize; #else return -1; #endif } - bool repairDatabase( const char *ns, string &errmsg, + bool repairDatabase( string dbNameS , string &errmsg, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { + doingRepair dr; + dbNameS = nsToDatabase( dbNameS ); + const char * dbName = dbNameS.c_str(); + stringstream ss; ss << "localhost:" << cmdLine.port; string localhost = ss.str(); - - // ns is of the form ".$cmd" - char dbName[256]; - nsToDatabase(ns, dbName); + problem() << "repairDatabase " << dbName << endl; assert( cc().database()->name == dbName ); BackgroundOperation::assertNoBgOpInProgForDb(dbName); boost::intmax_t totalSize = dbSize( dbName ); - boost::intmax_t freeSize = freeSpace(); + boost::intmax_t freeSize = freeSpace( repairpath ); if ( freeSize > -1 && freeSize < totalSize ) { stringstream ss; ss << "Cannot repair database " << dbName << " having size: " << totalSize @@ -1800,6 +1896,8 @@ dbs.insert( i->first ); } + currentClient.get()->getContext()->clear(); + BSONObjBuilder bb( result.subarrayStart( "dbs" ) ); int n = 0; int nNotClosed = 0; @@ -1813,13 +1911,16 @@ } else { closeDatabase( name.c_str() , path ); - bb.append( bb.numStr( n++ ).c_str() , name ); + bb.append( bb.numStr( n++ ) , name ); } } bb.done(); if( nNotClosed ) result.append("nNotClosed", nNotClosed); - + else { + ClientCursor::assertNoCursors(); + } + return true; } diff -Nru mongodb-1.4.4/db/pdfile.h mongodb-1.6.3/db/pdfile.h --- mongodb-1.4.4/db/pdfile.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/pdfile.h 2010-09-24 10:02:42.000000000 -0700 @@ -25,7 +25,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../util/mmap.h" #include "diskloc.h" #include "jsobjmanipulator.h" @@ -34,29 +34,28 @@ namespace mongo { - class MDFHeader; + class DataFileHeader; class Extent; class Record; class Cursor; class OpDebug; - void dropDatabase(const char *ns); - bool repairDatabase(const char *ns, string &errmsg, bool preserveClonedFilesOnFailure = false, bool backupOriginalFiles = false); + void dropDatabase(string db); + bool repairDatabase(string db, string &errmsg, bool preserveClonedFilesOnFailure = false, bool backupOriginalFiles = false); /* low level - only drops this ns */ void dropNS(const string& dropNs); /* deletes this ns, indexes and cursors */ void dropCollection( const string &name, string &errmsg, BSONObjBuilder &result ); - bool userCreateNS(const char *ns, BSONObj j, string& err, bool logForReplication); - auto_ptr findTableScan(const char *ns, const BSONObj& order, const DiskLoc &startLoc=DiskLoc()); + bool userCreateNS(const char *ns, BSONObj j, string& err, bool logForReplication, bool *deferIdIndex = 0); + shared_ptr findTableScan(const char *ns, const BSONObj& order, const DiskLoc &startLoc=DiskLoc()); // -1 if library unavailable. - boost::intmax_t freeSpace(); + boost::intmax_t freeSpace( const string &path = dbpath ); /*---------------------------------------------------------------------*/ - class MDFHeader; class MongoDataFile { friend class DataFileMgr; friend class BasicCursor; @@ -70,22 +69,29 @@ */ Extent* createExtent(const char *ns, int approxSize, bool capped = false, int loops = 0); - MDFHeader *getHeader() { + DataFileHeader *getHeader() { return header; } /* return max size an extent may be */ static int maxSize(); - + + void flush( bool sync ); + private: + void badOfs(int) const; + int defaultSize( const char *filename ) const; Extent* getExtent(DiskLoc loc); Extent* _getExtent(DiskLoc loc); Record* recordAt(DiskLoc dl); + Record* makeRecord(DiskLoc dl, int size); + void grow(DiskLoc dl, int size); - MemoryMappedFile mmf; - MDFHeader *header; + MMF mmf; + MMF::Pointer _p; + DataFileHeader *header; int fileNo; }; @@ -98,18 +104,26 @@ static Extent* allocFromFreeList(const char *ns, int approxSize, bool capped = false); /** @return DiskLoc where item ends up */ + // changedId should be initialized to false const DiskLoc updateRecord( const char *ns, NamespaceDetails *d, NamespaceDetailsTransient *nsdt, Record *toupdate, const DiskLoc& dl, - const char *buf, int len, OpDebug& debug); + const char *buf, int len, OpDebug& debug, bool &changedId, bool god=false); + // The object o may be updated if modified on insert. void insertAndLog( const char *ns, const BSONObj &o, bool god = false ); - DiskLoc insert(const char *ns, BSONObj &o, bool god = false); + + /** @param obj both and in and out param -- insert can sometimes modify an object (such as add _id). */ + DiskLoc insertWithObjMod(const char *ns, BSONObj &o, bool god = false); + + /** @param obj in value only for this version. */ + void insertNoReturnVal(const char *ns, BSONObj o, bool god = false); + DiskLoc insert(const char *ns, const void *buf, int len, bool god = false, const BSONElement &writeId = BSONElement(), bool mayAddIndex = true); void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false); - static auto_ptr findAll(const char *ns, const DiskLoc &startLoc = DiskLoc()); + static shared_ptr findAll(const char *ns, const DiskLoc &startLoc = DiskLoc()); /* special version of insert for transaction logging -- streamlined a bit. assumes ns is capped and no indexes @@ -119,6 +133,8 @@ static Extent* getExtent(const DiskLoc& dl); static Record* getRecord(const DiskLoc& dl); + static DeletedRecord* makeDeletedRecord(const DiskLoc& dl, int len); + static void grow(const DiskLoc& dl, int len); /* does not clean up indexes, etc. : just deletes the record in the pdfile. */ void _deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl); @@ -197,7 +213,9 @@ int length; /* size of the extent, including these fields */ DiskLoc firstRecord, lastRecord; - char extentData[4]; + char _extentData[4]; + + static int HeaderSize() { return sizeof(Extent)-4; } bool validates() { return !(firstRecord.isNull() ^ lastRecord.isNull()) && @@ -239,6 +257,8 @@ Extent* getPrevExtent() { return xprev.isNull() ? 0 : DataFileMgr::getExtent(xprev); } + + static int maxSize(); }; /* @@ -254,8 +274,7 @@ ---------------------- */ - /* data file header */ - class MDFHeader { + class DataFileHeader { public: int version; int versionMinor; @@ -266,9 +285,7 @@ char data[4]; - static int headerSize() { - return sizeof(MDFHeader) - 4; - } + enum { HeaderSize = 8192 }; bool currentVersion() const { return ( version == VERSION ) && ( versionMinor == VERSION_MINOR ); @@ -279,28 +296,28 @@ return false; } - Record* getRecord(DiskLoc dl) { + /*Record* __getRecord(DiskLoc dl) { int ofs = dl.getOfs(); - assert( ofs >= headerSize() ); + assert( ofs >= HeaderSize ); return (Record*) (((char *) this) + ofs); - } + }*/ void init(int fileno, int filelength) { if ( uninitialized() ) { assert(filelength > 32768 ); - assert( headerSize() == 8192 ); + assert( HeaderSize == 8192 ); fileLength = filelength; version = VERSION; versionMinor = VERSION_MINOR; - unused.setOfs( fileno, headerSize() ); - assert( (data-(char*)this) == headerSize() ); - unusedLength = fileLength - headerSize() - 16; - memcpy(data+unusedLength, " \nthe end\n", 16); + unused.setOfs( fileno, HeaderSize ); + assert( (data-(char*)this) == HeaderSize ); + unusedLength = fileLength - HeaderSize - 16; + //memcpy(data+unusedLength, " \nthe end\n", 16); } } bool isEmpty() const { - return uninitialized() || ( unusedLength == fileLength - headerSize() - 16 ); + return uninitialized() || ( unusedLength == fileLength - HeaderSize - 16 ); } }; @@ -308,7 +325,7 @@ inline Extent* MongoDataFile::_getExtent(DiskLoc loc) { loc.assertOk(); - Extent *e = (Extent *) (((char *)header) + loc.getOfs()); + Extent *e = (Extent *) _p.at(loc.getOfs(), Extent::HeaderSize()); return e; } @@ -325,7 +342,20 @@ namespace mongo { inline Record* MongoDataFile::recordAt(DiskLoc dl) { - return header->getRecord(dl); + int ofs = dl.getOfs(); + if( ofs < DataFileHeader::HeaderSize ) badOfs(ofs); // will uassert - external call to keep out of the normal code path + return (Record*) _p.at(ofs, -1); + } + + inline void MongoDataFile::grow(DiskLoc dl, int size) { + int ofs = dl.getOfs(); + _p.grow(ofs, size); + } + + inline Record* MongoDataFile::makeRecord(DiskLoc dl, int size) { + int ofs = dl.getOfs(); + assert( ofs >= DataFileHeader::HeaderSize ); + return (Record*) _p.at(ofs, size); } inline DiskLoc Record::getNext(const DiskLoc& myLoc) { @@ -446,9 +476,31 @@ assert( dl.a() != -1 ); return cc().database()->getFile(dl.a())->recordAt(dl); } + + BOOST_STATIC_ASSERT( 16 == sizeof(DeletedRecord) ); + + inline void DataFileMgr::grow(const DiskLoc& dl, int len) { + assert( dl.a() != -1 ); + cc().database()->getFile(dl.a())->grow(dl, len); + } + + inline DeletedRecord* DataFileMgr::makeDeletedRecord(const DiskLoc& dl, int len) { + assert( dl.a() != -1 ); + return (DeletedRecord*) cc().database()->getFile(dl.a())->makeRecord(dl, sizeof(DeletedRecord)); + } void ensureHaveIdIndex(const char *ns); bool dropIndexes( NamespaceDetails *d, const char *ns, const char *name, string &errmsg, BSONObjBuilder &anObjBuilder, bool maydeleteIdIndex ); + + + /** + * @return true if ns is ok + */ + inline bool nsDollarCheck( const char* ns ){ + if ( strchr( ns , '$' ) == 0 ) + return true; + return strcmp( ns, "local.oplog.$main" ) == 0; + } } // namespace mongo diff -Nru mongodb-1.4.4/db/query.cpp mongodb-1.6.3/db/query.cpp --- mongodb-1.4.4/db/query.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/query.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,24 +16,26 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "query.h" #include "pdfile.h" #include "jsobjmanipulator.h" -#include "../util/builder.h" +#include "../bson/util/builder.h" #include #include "introspect.h" #include "btree.h" #include "../util/lruishmap.h" #include "json.h" #include "repl.h" -#include "replset.h" +#include "replpair.h" #include "scanandorder.h" #include "security.h" #include "curop.h" #include "commands.h" #include "queryoptimizer.h" #include "lasterror.h" +#include "../s/d_logic.h" +#include "repl_block.h" namespace mongo { @@ -49,7 +51,7 @@ extern bool useHints; // Just try to identify best plan. - class DeleteOp : public QueryOp { + class DeleteOp : public MultiCursor::CursorOp { public: DeleteOp( bool justOne, int& bestCount ) : justOne_( justOne ), @@ -57,9 +59,21 @@ bestCount_( bestCount ), _nscanned() { } - virtual void init() { + virtual void _init() { c_ = qp().newCursor(); - _matcher.reset( new CoveredIndexMatcher( qp().query(), qp().indexKey() ) ); + } + virtual bool prepareToYield() { + if ( ! _cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , qp().ns() ) ); + } + return _cc->prepareToYield( _yieldData ); + } + virtual void recoverFromYield() { + if ( !ClientCursor::recoverFromYield( _yieldData ) ) { + _cc.reset(); + c_.reset(); + massert( 13340, "cursor dropped during delete", false ); + } } virtual void next() { if ( !c_->ok() ) { @@ -69,7 +83,7 @@ DiskLoc rloc = c_->currLoc(); - if ( _matcher->matches(c_->currKey(), rloc ) ) { + if ( matcher()->matches(c_->currKey(), rloc ) ) { if ( !c_->getsetdup(rloc) ) ++count_; } @@ -87,17 +101,19 @@ } } virtual bool mayRecordPlan() const { return !justOne_; } - virtual QueryOp *clone() const { + virtual QueryOp *_createChild() const { + bestCount_ = 0; // should be safe to reset this in contexts where createChild() is called return new DeleteOp( justOne_, bestCount_ ); } - auto_ptr< Cursor > newCursor() const { return qp().newCursor(); } + virtual shared_ptr newCursor() const { return qp().newCursor(); } private: bool justOne_; int count_; int &bestCount_; long long _nscanned; - auto_ptr< Cursor > c_; - auto_ptr< CoveredIndexMatcher > _matcher; + shared_ptr c_; + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; }; /* ns: namespace, e.g. . @@ -105,7 +121,7 @@ justOne: stop after 1 match god: allow access to system namespaces, and don't yield */ - long long deleteObjects(const char *ns, BSONObj pattern, bool justOne, bool logop, bool god) { + long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) { if( !god ) { if ( strstr(ns, ".system.") ) { /* note a delete from system.indexes would corrupt the db @@ -126,52 +142,58 @@ uassert( 10101 , "can't remove from a capped collection" , ! d->capped ); long long nDeleted = 0; - QueryPlanSet s( ns, pattern, BSONObj() ); + int best = 0; - DeleteOp original( justOne, best ); - shared_ptr< DeleteOp > bestOp = s.runOp( original ); - auto_ptr< Cursor > creal = bestOp->newCursor(); + shared_ptr< MultiCursor::CursorOp > opPtr( new DeleteOp( justOneOrig, best ) ); + shared_ptr< MultiCursor > creal( new MultiCursor( ns, pattern, BSONObj(), opPtr, true ) ); if( !creal->ok() ) return nDeleted; - - CoveredIndexMatcher matcher(pattern, creal->indexKeyPattern()); - - auto_ptr cc( new ClientCursor(creal, ns, false) ); + + shared_ptr< Cursor > cPtr = creal; + auto_ptr cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) ); cc->setDoingDeletes( true ); - + CursorId id = cc->cursorid; - - unsigned long long nScanned = 0; + + bool justOne = justOneOrig; + bool canYield = !god && !creal->matcher()->docMatcher().atomic(); do { - if ( ++nScanned % 128 == 0 && !god && !matcher.docMatcher().atomic() ) { - if ( ! cc->yield() ){ - cc.release(); // has already been deleted elsewhere - break; - } + if ( canYield && ! cc->yieldSometimes() ){ + cc.release(); // has already been deleted elsewhere + // TODO should we assert or something? + break; } - + if ( !cc->c->ok() ) { + break; // if we yielded, could have hit the end + } + // this way we can avoid calling updateLocation() every time (expensive) // as well as some other nuances handled cc->setDoingDeletes( true ); - + DiskLoc rloc = cc->c->currLoc(); BSONObj key = cc->c->currKey(); + + // NOTE Calling advance() may change the matcher, so it's important + // to try to match first. + bool match = creal->matcher()->matches( key , rloc ); - cc->c->advance(); - - if ( ! matcher.matches( key , rloc ) ) + if ( ! cc->c->advance() ) + justOne = true; + + if ( ! match ) continue; - + assert( !cc->c->getsetdup(rloc) ); // can't be a dup, we deleted it! - + if ( !justOne ) { /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore - blocks. here we might call millions of times which would be bad. - */ + blocks. here we might call millions of times which would be bad. + */ cc->c->noteLocation(); } - + if ( logop ) { BSONElement e; if( BSONObj( rloc.rec() ).getObjectID( e ) ) { @@ -184,18 +206,22 @@ } } + if ( rs ) + rs->goingToDelete( rloc.obj() /*cc->c->current()*/ ); + theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc); nDeleted++; - if ( justOne ) + if ( justOne ) { break; + } cc->c->checkLocation(); - + } while ( cc->c->ok() ); if ( cc.get() && ClientCursor::find( id , false ) == 0 ){ cc.release(); } - + return nDeleted; } @@ -208,14 +234,13 @@ return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions); } catch ( AssertionException& e ) { - if ( !e.msg.empty() ) - anObjBuilder.append("assertion", e.msg); + e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" ); } curop.debug().str << " assertion "; anObjBuilder.append("errmsg", "db assertion failure"); anObjBuilder.append("ok", 0.0); BSONObj x = anObjBuilder.done(); - b.append((void*) x.objdata(), x.objsize()); + b.appendBuf((void*) x.objdata(), x.objsize()); return true; } @@ -227,7 +252,9 @@ if ( ClientCursor::erase(ids[i]) ) k++; } - log( k == n ) << "killcursors: found " << k << " of " << n << '\n'; + if ( logLevel > 0 || k != n ){ + log( k == n ) << "killcursors: found " << k << " of " << n << endl; + } } BSONObj id_obj = fromjson("{\"_id\":1}"); @@ -250,8 +277,9 @@ return qr; } - QueryResult* getMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop ) { - StringBuilder& ss = curop.debug().str; + QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) { +// log() << "TEMP GETMORE " << ns << ' ' << cursorid << ' ' << pass << endl; + exhaust = false; ClientCursor::Pointer p(cursorid); ClientCursor *cc = p._c; @@ -260,30 +288,51 @@ bufSize += sizeof( QueryResult ); bufSize += ( ntoreturn ? 4 : 1 ) * 1024 * 1024; } + BufBuilder b( bufSize ); b.skip(sizeof(QueryResult)); - - int resultFlags = 0; //QueryResult::ResultFlag_AwaitCapable; + + int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; if ( !cc ) { log() << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; - resultFlags = QueryResult::ResultFlag_CursorNotFound; + resultFlags = ResultFlag_CursorNotFound; } else { - ss << " query: " << cc->query << " "; + if ( pass == 0 ) + cc->updateSlaveLocation( curop ); + + int queryOptions = cc->_queryOptions; + + if( pass == 0 ) { + StringBuilder& ss = curop.debug().str; + ss << " getMore: " << cc->query.toString() << " "; + } + start = cc->pos; Cursor *c = cc->c.get(); c->checkLocation(); + DiskLoc last; + while ( 1 ) { if ( !c->ok() ) { +// log() << "TEMP Tailable : " << c->tailable() << ' ' << (queryOptions & QueryOption_AwaitData) << endl; if ( c->tailable() ) { - if ( c->advance() ) { + /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however + advance() can still be retries as a reactivation attempt. when there is new data, it will + return true. that's what we are doing here. + */ + if ( c->advance() ) continue; + + if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) { + throw GetMoreWaitException(); } + break; } p.release(); @@ -293,31 +342,42 @@ cc = 0; break; } - if ( !cc->matcher->matches(c->currKey(), c->currLoc() ) ) { + // in some cases (clone collection) there won't be a matcher + if ( c->matcher() && !c->matcher()->matches(c->currKey(), c->currLoc() ) ) { } + /* + TODO + else if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( c->currKey(), c->currLoc() ) ){ + cout << "TEMP skipping un-owned chunk: " << c->current() << endl; + } + */ else { - //out() << "matches " << c->currLoc().toString() << '\n'; if( c->getsetdup(c->currLoc()) ) { //out() << " but it's a dup \n"; } else { + last = c->currLoc(); BSONObj js = c->current(); - fillQueryResultFromObj(b, cc->fields.get(), js); + + // show disk loc should be part of the main query, not in an $or clause, so this should be ok + fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0)); n++; if ( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) || (ntoreturn==0 && b.len()>1*1024*1024) ) { c->advance(); cc->pos += n; - //cc->updateLocation(); break; } } } c->advance(); } + if ( cc ) { cc->updateLocation(); cc->mayUpgradeStorage(); + cc->storeOpForSlave( last ); + exhaust = cc->_queryOptions & QueryOption_Exhaust; } } @@ -335,26 +395,45 @@ class CountOp : public QueryOp { public: - CountOp( const BSONObj &spec ) : spec_( spec ), count_(), bc_() {} - virtual void init() { - query_ = spec_.getObjectField( "query" ); + CountOp( const string& ns , const BSONObj &spec ) : + _ns(ns), count_(), + skip_( spec["skip"].numberLong() ), + limit_( spec["limit"].numberLong() ), + bc_(){ + } + + virtual void _init() { c_ = qp().newCursor(); - _matcher.reset( new CoveredIndexMatcher( query_, c_->indexKeyPattern() ) ); - if ( qp().exactKeyMatch() && ! _matcher->needRecord() ) { + + if ( qp().exactKeyMatch() && ! matcher()->needRecord() ) { query_ = qp().simplifiedQuery( qp().indexKey() ); bc_ = dynamic_cast< BtreeCursor* >( c_.get() ); bc_->forgetEndKey(); } - - skip_ = spec_["skip"].numberLong(); - limit_ = spec_["limit"].numberLong(); } + virtual bool prepareToYield() { + if ( ! _cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , _ns.c_str() ) ); + } + return _cc->prepareToYield( _yieldData ); + } + + virtual void recoverFromYield() { + if ( !ClientCursor::recoverFromYield( _yieldData ) ) { + c_.reset(); + _cc.reset(); + massert( 13337, "cursor dropped during count", false ); + // TODO maybe we want to prevent recording the winning plan as well? + } + } + virtual void next() { if ( !c_->ok() ) { setComplete(); return; } + if ( bc_ ) { if ( firstMatch_.isEmpty() ) { firstMatch_ = bc_->currKeyNode().key; @@ -371,8 +450,9 @@ } _gotOne(); } - } else { - if ( !_matcher->matches(c_->currKey(), c_->currLoc() ) ) { + } + else { + if ( !matcher()->matches(c_->currKey(), c_->currLoc() ) ) { } else if( !c_->getsetdup(c_->currLoc()) ) { _gotOne(); @@ -380,8 +460,12 @@ } c_->advance(); } - virtual QueryOp *clone() const { - return new CountOp( spec_ ); + virtual QueryOp *_createChild() const { + CountOp *ret = new CountOp( _ns , BSONObj() ); + ret->count_ = count_; + ret->skip_ = skip_; + ret->limit_ = limit_; + return ret; } long long count() const { return count_; } virtual bool mayRecordPlan() const { return true; } @@ -394,93 +478,187 @@ } if ( limit_ > 0 && count_ >= limit_ ){ - setComplete(); + setStop(); return; } count_++; } - BSONObj spec_; + string _ns; + long long count_; long long skip_; long long limit_; - auto_ptr< Cursor > c_; + shared_ptr c_; BSONObj query_; BtreeCursor *bc_; - auto_ptr< CoveredIndexMatcher > _matcher; BSONObj firstMatch_; + + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; }; - + /* { count: "collectionname"[, query: ] } returns -1 on ns does not exist error. */ long long runCount( const char *ns, const BSONObj &cmd, string &err ) { + Client::Context cx(ns); NamespaceDetails *d = nsdetails( ns ); if ( !d ) { err = "ns missing"; return -1; } BSONObj query = cmd.getObjectField("query"); - + // count of all objects if ( query.isEmpty() ){ - long long num = d->nrecords; - num = num - cmd["skip"].numberLong(); - if ( num < 0 ) { - num = 0; - } - if ( cmd["limit"].isNumber() ){ - long long limit = cmd["limit"].numberLong(); - if ( limit < num ){ - num = limit; - } - } - return num; - } - QueryPlanSet qps( ns, query, BSONObj() ); - CountOp original( cmd ); - shared_ptr< CountOp > res = qps.runOp( original ); + return applySkipLimit( d->nrecords , cmd ); + } + MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true ); + CountOp original( ns , cmd ); + shared_ptr< CountOp > res = mps.runOp( original ); if ( !res->complete() ) { log() << "Count with ns: " << ns << " and query: " << query - << " failed with exception: " << res->exceptionMessage() + << " failed with exception: " << res->exception() << endl; return 0; } return res->count(); } - + + class ExplainBuilder { + public: + ExplainBuilder() : _i() {} + void ensureStartScan() { + if ( !_a.get() ) { + _a.reset( new BSONArrayBuilder() ); + } + } + void noteCursor( Cursor *c ) { + BSONObjBuilder b( _a->subobjStart() ); + b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds(); + b.done(); + } + void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder, int millis, bool hint ) { + if ( _i == 1 ) { + _c.reset( new BSONArrayBuilder() ); + *_c << _b->obj(); + } + if ( _i == 0 ) { + _b.reset( new BSONObjBuilder() ); + } else { + _b.reset( new BSONObjBuilder( _c->subobjStart() ) ); + } + *_b << "cursor" << c->toString(); + _b->appendNumber( "nscanned", nscanned ); + _b->appendNumber( "nscannedObjects", nscannedObjects ); + *_b << "n" << n; + + if ( scanAndOrder ) + *_b << "scanAndOrder" << true; + + *_b << "millis" << millis; + + *_b << "indexBounds" << c->prettyIndexBounds(); + + if ( !hint ) { + *_b << "allPlans" << _a->arr(); + } + if ( _i != 0 ) { + _b->done(); + } + _a.reset( 0 ); + ++_i; + } + BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) { + if ( _i > 1 ) { + BSONObjBuilder b; + b << "clauses" << _c->arr(); + b.appendNumber( "nscanned", nscanned ); + b.appendNumber( "nscanneObjects", nscannedObjects ); + b << "n" << n; + b << "millis" << millis; + b.appendElements( suffix ); + return b.obj(); + } else { + _b->appendElements( suffix ); + return _b->obj(); + } + } + private: + auto_ptr< BSONArrayBuilder > _a; + auto_ptr< BSONObjBuilder > _b; + auto_ptr< BSONArrayBuilder > _c; + int _i; + }; + // Implements database 'query' requests using the query optimizer's QueryOp interface class UserQueryOp : public QueryOp { public: - UserQueryOp( const ParsedQuery& pq ) : - //int ntoskip, int ntoreturn, const BSONObj &order, bool wantMore, - // bool explain, FieldMatcher *filter, int queryOptions ) : + UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) : _buf( 32768 ) , // TODO be smarter here _pq( pq ) , _ntoskip( pq.getSkip() ) , - _nscanned(0), _nscannedObjects(0), + _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0), _n(0), + _oldN(0), + _chunkMatcher(shardingState.getChunkMatcher(pq.ns())), _inMemSort(false), _saveClientCursor(false), - _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ) + _wouldSaveClientCursor(false), + _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ), + _response( response ), + _eb( eb ), + _curop( curop ) {} - virtual void init() { - _buf.skip( sizeof( QueryResult ) ); + virtual void _init() { + // only need to put the QueryResult fields there if we're building the first buffer in the message. + if ( _response.empty() ) { + _buf.skip( sizeof( QueryResult ) ); + } if ( _oplogReplay ) { _findingStartCursor.reset( new FindingStartCursor( qp() ) ); } else { _c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() ); } - _matcher.reset(new CoveredIndexMatcher( qp().query() , qp().indexKey())); if ( qp().scanAndOrderRequired() ) { _inMemSort = true; _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder() ) ); } + + if ( _pq.isExplain() ) { + _eb.noteCursor( _c.get() ); + } + } + + virtual bool prepareToYield() { + if ( _findingStartCursor.get() ) { + return _findingStartCursor->prepareToYield(); + } else { + if ( ! _cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) ); + } + return _cc->prepareToYield( _yieldData ); + } + } + + virtual void recoverFromYield() { + if ( _findingStartCursor.get() ) { + _findingStartCursor->recoverFromYield(); + } else { + if ( !ClientCursor::recoverFromYield( _yieldData ) ) { + _c.reset(); + _cc.reset(); + _so.reset(); + massert( 13338, "cursor dropped during query", false ); + // TODO maybe we want to prevent recording the winning plan as well? + } + } } virtual void next() { @@ -495,18 +673,23 @@ } if ( !_c->ok() ) { - finish(); + finish( false ); return; } - + bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors; if( 0 ) { cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl; } + + if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ){ + finish( true ); //? + return; + } _nscanned++; - if ( !_matcher->matches(_c->currKey(), _c->currLoc() , &_details ) ) { + if ( !matcher()->matches(_c->currKey(), _c->currLoc() , &_details ) ) { // not a match, continue onward if ( _details.loadedObject ) _nscannedObjects++; @@ -514,15 +697,18 @@ else { _nscannedObjects++; DiskLoc cl = _c->currLoc(); - if( !_c->getsetdup(cl) ) { + if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( _c->currKey(), _c->currLoc() ) ){ + // cout << "TEMP skipping un-owned chunk: " << _c->current() << endl; + } + else if( _c->getsetdup(cl) ) { + // dup + } + else { // got a match. - BSONObj js = _pq.returnKey() ? _c->currKey() : _c->current(); - assert( js.objsize() >= 0 ); //defensive for segfaults - if ( _inMemSort ) { // note: no cursors for non-indexed, ordered results. results must be fairly small. - _so->add(js); + _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 ); } else if ( _ntoskip > 0 ) { _ntoskip--; @@ -530,38 +716,48 @@ else { if ( _pq.isExplain() ) { _n++; - if ( _n >= _pq.getNumToReturn() && !_pq.wantMore() ) { + if ( n() >= _pq.getNumToReturn() && !_pq.wantMore() ) { // .limit() was used, show just that much. - finish(); + finish( true ); //? return; } } else { + if ( _pq.returnKey() ){ BSONObjBuilder bb( _buf ); - bb.appendKeys( _c->indexKeyPattern() , js ); + bb.appendKeys( _c->indexKeyPattern() , _c->currKey() ); bb.done(); } else { - fillQueryResultFromObj( _buf , _pq.getFields() , js ); + BSONObj js = _c->current(); + assert( js.isValid() ); + + if ( _oplogReplay ){ + BSONElement e = js["ts"]; + if ( e.type() == Date || e.type() == Timestamp ) + _slaveReadTill = e._opTime(); + } + + fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0)); } _n++; if ( ! _c->supportGetMore() ){ - if ( _pq.enough( _n ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ){ - finish(); + if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ){ + finish( true ); return; } } - else if ( _pq.enoughForFirstBatch( _n , _buf.len() ) ){ + else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ){ /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */ if ( mayCreateCursor1 ) { - _c->advance(); - if ( _c->ok() ) { + _wouldSaveClientCursor = true; + if ( _c->advance() ) { // more...so save a cursor _saveClientCursor = true; } } - finish(); + finish( true ); return; } } @@ -571,12 +767,14 @@ _c->advance(); } - void finish() { + // this plan won, so set data for response broadly + void finish( bool stop ) { if ( _pq.isExplain() ) { _n = _inMemSort ? _so->size() : _n; } else if ( _inMemSort ) { - _so->fill( _buf, _pq.getFields() , _n ); + if( _so.get() ) + _so->fill( _buf, _pq.getFields() , _n ); } if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 ) @@ -586,121 +784,174 @@ if ( _c->tailable() ) _saveClientCursor = true; - setComplete(); + if ( _pq.isExplain()) { + _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(), _curop.elapsedMillis(), useHints && !_pq.getHint().eoo() ); + } else { + _response.appendData( _buf.buf(), _buf.len() ); + _buf.decouple(); + } + if ( stop ) { + setStop(); + } else { + setComplete(); + } + + } + + void finishExplain( const BSONObj &suffix ) { + BSONObj obj = _eb.finishWithSuffix( nscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix); + fillQueryResultFromObj(_buf, 0, obj); + _n = 1; + _oldN = 0; + _response.appendData( _buf.buf(), _buf.len() ); + _buf.decouple(); } virtual bool mayRecordPlan() const { return _pq.getNumToReturn() != 1; } - virtual QueryOp *clone() const { - return new UserQueryOp( _pq ); + virtual QueryOp *_createChild() const { + if ( _pq.isExplain() ) { + _eb.ensureStartScan(); + } + UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop ); + ret->_oldN = n(); + ret->_oldNscanned = nscanned(); + ret->_oldNscannedObjects = nscannedObjects(); + ret->_ntoskip = _ntoskip; + return ret; } - BufBuilder &builder() { return _buf; } bool scanAndOrderRequired() const { return _inMemSort; } - auto_ptr< Cursor > cursor() { return _c; } - auto_ptr< CoveredIndexMatcher > matcher() { return _matcher; } - int n() const { return _n; } - long long nscanned() const { return _nscanned; } - long long nscannedObjects() const { return _nscannedObjects; } + shared_ptr cursor() { return _c; } + int n() const { return _oldN + _n; } + long long nscanned() const { return _nscanned + _oldNscanned; } + long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; } bool saveClientCursor() const { return _saveClientCursor; } + bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; } + + void finishForOplogReplay( ClientCursor * cc ){ + if ( _oplogReplay && ! _slaveReadTill.isNull() ) + cc->_slaveReadTill = _slaveReadTill; + } private: BufBuilder _buf; const ParsedQuery& _pq; long long _ntoskip; long long _nscanned; + long long _oldNscanned; long long _nscannedObjects; + long long _oldNscannedObjects; int _n; // found so far + int _oldN; MatchDetails _details; + ChunkMatcherPtr _chunkMatcher; + bool _inMemSort; auto_ptr< ScanAndOrder > _so; - auto_ptr< Cursor > _c; - - auto_ptr< CoveredIndexMatcher > _matcher; + shared_ptr _c; + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; bool _saveClientCursor; + bool _wouldSaveClientCursor; bool _oplogReplay; auto_ptr< FindingStartCursor > _findingStartCursor; + + Message &_response; + ExplainBuilder &_eb; + CurOp &_curop; + OpTime _slaveReadTill; }; /* run a query -- includes checking for and running a Command */ - auto_ptr< QueryResult > runQuery(Message& m, QueryMessage& q, CurOp& curop ) { + const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { StringBuilder& ss = curop.debug().str; - ParsedQuery pq( q ); - const char *ns = q.ns; + shared_ptr pq_shared( new ParsedQuery(q) ); + ParsedQuery& pq( *pq_shared ); int ntoskip = q.ntoskip; BSONObj jsobj = q.query; int queryOptions = q.queryOptions; - BSONObj snapshotHint; + const char *ns = q.ns; if( logLevel >= 2 ) - log() << "runQuery: " << ns << jsobj << endl; + log() << "query: " << ns << jsobj << endl; - long long nscanned = 0; - ss << ns << " ntoreturn:" << pq.getNumToReturn(); + ss << ns; + { + // only say ntoreturn if nonzero. + int n = pq.getNumToReturn(); + if( n ) + ss << " ntoreturn:" << n; + } curop.setQuery(jsobj); - BSONObjBuilder cmdResBuf; - long long cursorid = 0; - - auto_ptr< QueryResult > qr; - int n = 0; - - Client& c = cc(); - - if ( pq.couldBeCommand() ){ + if ( pq.couldBeCommand() ) { BufBuilder bb; bb.skip(sizeof(QueryResult)); - + BSONObjBuilder cmdResBuf; if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) { - ss << " command "; + ss << " command: " << jsobj.toString(); curop.markCommand(); - n = 1; + auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); ss << " reslen:" << bb.len(); - // qr->channel = 0; qr->setOperation(opReply); - qr->cursorId = cursorid; + qr->cursorId = 0; qr->startingFrom = 0; - qr->nReturned = n; + qr->nReturned = 1; + result.setData( qr.release(), true ); } - return qr; + return false; } - // regular query - - mongolock lk(false); // read lock - Client::Context ctx( ns , dbpath , &lk ); - - /* we allow queries to SimpleSlave's -- but not to the slave (nonmaster) member of a replica pair - so that queries to a pair are realtime consistent as much as possible. use setSlaveOk() to - query the nonmaster member of a replica pair. - */ - uassert( 10107 , "not master" , isMaster() || pq.hasOption( QueryOption_SlaveOk ) || replSettings.slave == SimpleSlave ); + /* --- regular query --- */ + int n = 0; BSONElement hint = useHints ? pq.getHint() : BSONElement(); bool explain = pq.isExplain(); bool snapshot = pq.isSnapshot(); - BSONObj query = pq.getFilter(); BSONObj order = pq.getOrder(); + BSONObj query = pq.getFilter(); + + /* The ElemIter will not be happy if this isn't really an object. So throw exception + here when that is true. + (Which may indicate bad data from client.) + */ + if ( query.objsize() == 0 ) { + out() << "Bad query object?\n jsobj:"; + out() << jsobj.toString() << "\n query:"; + out() << query.toString() << endl; + uassert( 10110 , "bad query object", false); + } + + /* --- read lock --- */ + + mongolock lk(false); + + Client::Context ctx( ns , dbpath , &lk ); + + replVerifyReadsOk(pq); if ( pq.hasOption( QueryOption_CursorTailable ) ) { NamespaceDetails *d = nsdetails( ns ); uassert( 13051, "tailable cursor requested on non capped collection", d && d->capped ); + const BSONObj nat1 = BSON( "$natural" << 1 ); if ( order.isEmpty() ) { - order = BSON( "$natural" << 1 ); + order = nat1; } else { - uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == BSON( "$natural" << 1 ) ); + uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 ); } } + BSONObj snapshotHint; // put here to keep the data in scope if( snapshot ) { NamespaceDetails *d = nsdetails(ns); if ( d ){ @@ -722,25 +973,12 @@ } } - /* The ElemIter will not be happy if this isn't really an object. So throw exception - here when that is true. - (Which may indicate bad data from client.) - */ - if ( query.objsize() == 0 ) { - out() << "Bad query object?\n jsobj:"; - out() << jsobj.toString() << "\n query:"; - out() << query.toString() << endl; - uassert( 10110 , "bad query object", false); - } - - - if ( ! explain && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { - nscanned = 1; - + if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { bool nsFound = false; bool indexFound = false; BSONObj resObject; + Client& c = cc(); bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound ); if ( nsFound == false || indexFound == true ){ BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32); @@ -751,16 +989,18 @@ n = 1; fillQueryResultFromObj( bb , pq.getFields() , resObject ); } + auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); ss << " reslen:" << bb.len(); qr->setOperation(opReply); - qr->cursorId = cursorid; + qr->cursorId = 0; qr->startingFrom = 0; - qr->nReturned = n; - return qr; + qr->nReturned = n; + result.setData( qr.release(), true ); + return false; } } @@ -768,67 +1008,72 @@ BSONObj oldPlan; if ( explain && ! pq.hasIndexSpecifier() ){ - QueryPlanSet qps( ns, query, order ); - if ( qps.usingPrerecordedPlan() ) - oldPlan = qps.explain(); - } - QueryPlanSet qps( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax() ); - UserQueryOp original( pq ); - shared_ptr< UserQueryOp > o = qps.runOp( original ); + MultiPlanScanner mps( ns, query, order ); + if ( mps.usingPrerecordedPlan() ) + oldPlan = mps.oldExplain(); + } + auto_ptr< MultiPlanScanner > mps( new MultiPlanScanner( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax(), false, true ) ); + BSONObj explainSuffix; + if ( explain ) { + BSONObjBuilder bb; + if ( !oldPlan.isEmpty() ) + bb.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() ); + explainSuffix = bb.obj(); + } + ExplainBuilder eb; + UserQueryOp original( pq, result, eb, curop ); + shared_ptr< UserQueryOp > o = mps->runOp( original ); UserQueryOp &dqo = *o; - massert( 10362 , dqo.exceptionMessage(), dqo.complete() ); + if ( ! dqo.complete() ) + throw MsgAssertionException( dqo.exception() ); + if ( explain ) { + dqo.finishExplain( explainSuffix ); + } n = dqo.n(); - nscanned = dqo.nscanned(); + long long nscanned = dqo.nscanned(); if ( dqo.scanAndOrderRequired() ) ss << " scanAndOrder "; - auto_ptr cursor = dqo.cursor(); - log( 5 ) << " used cursor: " << cursor.get() << endl; - if ( dqo.saveClientCursor() ) { - // the clientcursor now owns the Cursor* and 'c' is released: - ClientCursor *cc = new ClientCursor(cursor, ns, !(queryOptions & QueryOption_NoCursorTimeout)); + shared_ptr cursor = dqo.cursor(); + if( logLevel >= 5 ) + log() << " used cursor: " << cursor.get() << endl; + long long cursorid = 0; + const char * exhaust = 0; + if ( dqo.saveClientCursor() || ( dqo.wouldSaveClientCursor() && mps->mayRunMore() ) ) { + ClientCursor *cc; + bool moreClauses = mps->mayRunMore(); + if ( moreClauses ) { + // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield + shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher(), dqo ) ); + cc = new ClientCursor(queryOptions, multi, ns, jsobj.getOwned()); + } else { + cursor->setMatcher( dqo.matcher() ); + cc = new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() ); + } cursorid = cc->cursorid; - cc->query = jsobj.getOwned(); - DEV out() << " query has more, cursorid: " << cursorid << endl; - cc->matcher = dqo.matcher(); + DEV tlog(2) << "query has more, cursorid: " << cursorid << endl; cc->pos = n; + cc->pq = pq_shared; cc->fields = pq.getFieldPtr(); cc->originalMessage = m; cc->updateLocation(); - if ( !cc->c->ok() && cc->c->tailable() ) { - DEV out() << " query has no more but tailable, cursorid: " << cursorid << endl; - } else { - DEV out() << " query has more, cursorid: " << cursorid << endl; + if ( !cc->c->ok() && cc->c->tailable() ) + DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl; + if( queryOptions & QueryOption_Exhaust ) { + exhaust = ns; + ss << " exhaust "; } + dqo.finishForOplogReplay(cc); } - if ( explain ) { - BSONObjBuilder builder; - builder.append("cursor", cursor->toString()); - builder.appendArray("indexBounds", cursor->prettyIndexBounds()); - builder.appendNumber("nscanned", dqo.nscanned() ); - builder.appendNumber("nscannedObjects", dqo.nscannedObjects() ); - builder.append("n", n); - if ( dqo.scanAndOrderRequired() ) - builder.append("scanAndOrder", true); - builder.append("millis", curop.elapsedMillis()); - if ( !oldPlan.isEmpty() ) - builder.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() ); - if ( hint.eoo() ) - builder.appendElements(qps.explain()); - BSONObj obj = builder.done(); - fillQueryResultFromObj(dqo.builder(), 0, obj); - n = 1; - } - qr.reset( (QueryResult *) dqo.builder().buf() ); - dqo.builder().decouple(); + + QueryResult *qr = (QueryResult *) result.header(); qr->cursorId = cursorid; qr->setResultFlagsToOk(); - qr->len = dqo.builder().len(); + // qr->len is updated automatically by appendData() ss << " reslen:" << qr->len; qr->setOperation(opReply); qr->startingFrom = 0; qr->nReturned = n; - int duration = curop.elapsedMillis(); bool dbprofile = curop.shouldDBProfile( duration ); if ( dbprofile || duration >= cmdLine.slowMS ) { @@ -837,10 +1082,10 @@ ss << " ntoskip:" << ntoskip; if ( dbprofile ) ss << " \nquery: "; - ss << jsobj << ' '; + ss << jsobj.toString() << ' '; } ss << " nreturned:" << n; - return qr; + return exhaust; } } // namespace mongo diff -Nru mongodb-1.4.4/db/query.h mongodb-1.6.3/db/query.h --- mongodb-1.4.4/db/query.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/query.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../util/message.h" #include "dbmessage.h" #include "jsobj.h" @@ -74,44 +74,44 @@ extern const int MaxBytesToReturnToClientAtOnce; // for an existing query (ie a ClientCursor), send back additional information. - QueryResult* getMore(const char *ns, int ntoreturn, long long cursorid , CurOp& op); + struct GetMoreWaitException { }; + QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& op, int pass, bool& exhaust); + struct UpdateResult { - bool existing; - bool mod; - long long num; - - UpdateResult( bool e, bool m, unsigned long long n ) - : existing(e) , mod(m), num(n ){} - - int oldCode(){ - if ( ! num ) - return 0; - - if ( existing ){ - if ( mod ) - return 2; - return 1; + bool existing; // if existing objects were modified + bool mod; // was this a $ mod + long long num; // how many objects touched + OID upserted; // if something was upserted, the new _id of the object + + UpdateResult( bool e, bool m, unsigned long long n , const BSONObj& upsertedObject = BSONObj() ) + : existing(e) , mod(m), num(n){ + upserted.clear(); + + BSONElement id = upsertedObject["_id"]; + if ( ! e && n == 1 && id.type() == jstOID ){ + upserted = id.OID(); } - - if ( mod ) - return 3; - return 4; } + }; + + class RemoveSaver; /* returns true if an existing object was updated, false if no existing object was found. multi - update multiple objects - mostly useful with things like $set - god - allow access to system namespaces and don't yield + god - allow access to system namespaces */ UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj pattern, bool upsert, bool multi , bool logop , OpDebug& debug ); + UpdateResult _updateObjects(bool god, const char *ns, const BSONObj& updateobj, BSONObj pattern, + bool upsert, bool multi , bool logop , OpDebug& debug , RemoveSaver * rs = 0 ); // If justOne is true, deletedId is set to the id of the deleted object. - long long deleteObjects(const char *ns, BSONObj pattern, bool justOne, bool logop = false, bool god=false); + long long deleteObjects(const char *ns, BSONObj pattern, bool justOne, bool logop = false, bool god=false, RemoveSaver * rs=0); long long runCount(const char *ns, const BSONObj& cmd, string& err); - - auto_ptr< QueryResult > runQuery(Message& m, QueryMessage& q, CurOp& curop ); + + const char * runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result); /* This is for languages whose "objects" are not well ordered (JSON is well ordered). [ { a : ... } , { b : ... } ] -> { a : ..., b : ... } @@ -157,6 +157,7 @@ ~ParsedQuery(){} const char * ns() const { return _ns; } + bool isLocalDB() const { return strncmp(_ns, "local.", 6) == 0; } const BSONObj& getFilter() const { return _filter; } FieldMatcher* getFields() const { return _fields.get(); } @@ -172,12 +173,14 @@ bool isExplain() const { return _explain; } bool isSnapshot() const { return _snapshot; } bool returnKey() const { return _returnKey; } + bool showDiskLoc() const { return _showDiskLoc; } const BSONObj& getMin() const { return _min; } const BSONObj& getMax() const { return _max; } const BSONObj& getOrder() const { return _order; } const BSONElement& getHint() const { return _hint; } - + int getMaxScan() const { return _maxScan; } + bool couldBeCommand() const { /* we assume you are using findOne() for running a cmd... */ return _ntoreturn == 1 && strstr( _ns , ".$cmd" ); @@ -239,6 +242,8 @@ _explain = false; _snapshot = false; _returnKey = false; + _showDiskLoc = false; + _maxScan = 0; } void _initTop( const BSONObj& top ){ @@ -268,6 +273,11 @@ _hint = e; else if ( strcmp( "$returnKey" , name ) == 0 ) _returnKey = e.trueValue(); + else if ( strcmp( "$maxScan" , name ) == 0 ) + _maxScan = e.numberInt(); + else if ( strcmp( "$showDiskLoc" , name ) == 0 ) + _showDiskLoc = e.trueValue(); + } @@ -302,12 +312,14 @@ bool _explain; bool _snapshot; bool _returnKey; + bool _showDiskLoc; BSONObj _min; BSONObj _max; BSONElement _hint; BSONObj _order; + int _maxScan; }; - + } // namespace mongo diff -Nru mongodb-1.4.4/db/queryoptimizer.cpp mongodb-1.6.3/db/queryoptimizer.cpp --- mongodb-1.4.4/db/queryoptimizer.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/queryoptimizer.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,13 +16,14 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "db.h" #include "btree.h" #include "pdfile.h" #include "queryoptimizer.h" #include "cmdline.h" +#include "clientcursor.h" //#define DEBUGQO(x) cout << x << endl; #define DEBUGQO(x) @@ -51,9 +52,10 @@ QueryPlan::QueryPlan( NamespaceDetails *_d, int _idxNo, - const FieldRangeSet &fbs, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : + const FieldRangeSet &fbs, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : d(_d), idxNo(_idxNo), fbs_( fbs ), + _originalQuery( originalQuery ), order_( order ), index_( 0 ), optimal_( false ), @@ -63,7 +65,8 @@ endKeyInclusive_( endKey.isEmpty() ), unhelpful_( false ), _special( special ), - _type(0){ + _type(0), + _startOrEndSpec( !startKey.isEmpty() || !endKey.isEmpty() ){ if ( !fbs_.matchPossible() ) { unhelpful_ = true; @@ -84,7 +87,8 @@ optimal_ = true; _type = index_->getSpec().getType(); massert( 13040 , (string)"no type for special: " + _special , _type ); - scanAndOrderRequired_ = _type->scanAndOrderRequired( fbs.query() , order ); + // hopefully safe to use original query in these contexts - don't think we can mix special with $or clause separation yet + scanAndOrderRequired_ = _type->scanAndOrderRequired( _originalQuery , order ); return; } @@ -153,38 +157,39 @@ if ( exactIndexedQueryCount == fbs.nNontrivialRanges() && orderFieldsUnindexed.size() == 0 && exactIndexedQueryCount == index_->keyPattern().nFields() && - exactIndexedQueryCount == fbs.query().nFields() ) { + exactIndexedQueryCount == _originalQuery.nFields() ) { exactKeyMatch_ = true; } - indexBounds_ = fbs.indexBounds( idxKey, direction_ ); - if ( !startKey.isEmpty() || !endKey.isEmpty() ) { + _frv.reset( new FieldRangeVector( fbs, idxKey, direction_ ) ); + if ( _startOrEndSpec ) { BSONObj newStart, newEnd; if ( !startKey.isEmpty() ) - newStart = startKey; + _startKey = startKey; else - newStart = indexBounds_[ 0 ].first; + _startKey = _frv->startKey(); if ( !endKey.isEmpty() ) - newEnd = endKey; + _endKey = endKey; else - newEnd = indexBounds_[ indexBounds_.size() - 1 ].second; - BoundList newBounds; - newBounds.push_back( make_pair( newStart, newEnd ) ); - indexBounds_ = newBounds; + _endKey = _frv->endKey(); } + if ( ( scanAndOrderRequired_ || order_.isEmpty() ) && - !fbs.range( idxKey.firstElement().fieldName() ).nontrivial() ) + !fbs.range( idxKey.firstElement().fieldName() ).nontrivial() ) { unhelpful_ = true; + } } - auto_ptr< Cursor > QueryPlan::newCursor( const DiskLoc &startLoc , int numWanted ) const { + shared_ptr QueryPlan::newCursor( const DiskLoc &startLoc , int numWanted ) const { - if ( _type ) - return _type->newCursor( fbs_.query() , order_ , numWanted ); + if ( _type ) { + // hopefully safe to use original query in these contexts - don't think we can mix type with $or clause separation yet + return _type->newCursor( _originalQuery , order_ , numWanted ); + } if ( !fbs_.matchPossible() ){ if ( fbs_.nNontrivialRanges() ) checkTableScanAllowed( fbs_.ns() ); - return auto_ptr< Cursor >( new BasicCursor( DiskLoc() ) ); + return shared_ptr( new BasicCursor( DiskLoc() ) ); } if ( !index_ ){ if ( fbs_.nNontrivialRanges() ) @@ -194,17 +199,19 @@ massert( 10363 , "newCursor() with start location not implemented for indexed plans", startLoc.isNull() ); - if ( indexBounds_.size() < 2 ) { + if ( _startOrEndSpec ) { // we are sure to spec endKeyInclusive_ - return auto_ptr< Cursor >( new BtreeCursor( d, idxNo, *index_, indexBounds_[ 0 ].first, indexBounds_[ 0 ].second, endKeyInclusive_, direction_ >= 0 ? 1 : -1 ) ); + return shared_ptr( new BtreeCursor( d, idxNo, *index_, _startKey, _endKey, endKeyInclusive_, direction_ >= 0 ? 1 : -1 ) ); + } else if ( index_->getSpec().getType() ) { + return shared_ptr( new BtreeCursor( d, idxNo, *index_, _frv->startKey(), _frv->endKey(), true, direction_ >= 0 ? 1 : -1 ) ); } else { - return auto_ptr< Cursor >( new BtreeCursor( d, idxNo, *index_, indexBounds_, direction_ >= 0 ? 1 : -1 ) ); + return shared_ptr( new BtreeCursor( d, idxNo, *index_, _frv, direction_ >= 0 ? 1 : -1 ) ); } } - auto_ptr< Cursor > QueryPlan::newReverseCursor() const { + shared_ptr QueryPlan::newReverseCursor() const { if ( !fbs_.matchPossible() ) - return auto_ptr< Cursor >( new BasicCursor( DiskLoc() ) ); + return shared_ptr( new BasicCursor( DiskLoc() ) ); if ( !index_ ) { int orderSpec = order_.getIntField( "$natural" ); if ( orderSpec == INT_MIN ) @@ -212,7 +219,7 @@ return findTableScan( fbs_.ns(), BSON( "$natural" << -orderSpec ) ); } massert( 10364 , "newReverseCursor() not implemented for indexed plans", false ); - return auto_ptr< Cursor >( 0 ); + return shared_ptr(); } BSONObj QueryPlan::indexKey() const { @@ -228,10 +235,10 @@ } } - QueryPlanSet::QueryPlanSet( const char *_ns, const BSONObj &query, const BSONObj &order, const BSONElement *hint, bool honorRecordedPlan, const BSONObj &min, const BSONObj &max ) : + QueryPlanSet::QueryPlanSet( const char *_ns, auto_ptr< FieldRangeSet > frs, const BSONObj &originalQuery, const BSONObj &order, const BSONElement *hint, bool honorRecordedPlan, const BSONObj &min, const BSONObj &max, bool bestGuessOnly, bool mayYield ) : ns(_ns), - query_( query.getOwned() ), - fbs_( _ns, query ), + _originalQuery( originalQuery ), + fbs_( frs ), mayRecordPlan_( true ), usingPrerecordedPlan_( false ), hint_( BSONObj() ), @@ -239,7 +246,10 @@ oldNScanned_( 0 ), honorRecordedPlan_( honorRecordedPlan ), min_( min.getOwned() ), - max_( max.getOwned() ) { + max_( max.getOwned() ), + _bestGuessOnly( bestGuessOnly ), + _mayYield( mayYield ), + _yieldSometimesTracker( 256, 20 ){ if ( hint && !hint->eoo() ) { hint_ = hint->wrap(); } @@ -251,59 +261,70 @@ string errmsg; BSONObj keyPattern = id.keyPattern(); // This reformats min_ and max_ to be used for index lookup. - massert( 10365 , errmsg, indexDetailsForRange( fbs_.ns(), errmsg, min_, max_, keyPattern ) ); + massert( 10365 , errmsg, indexDetailsForRange( fbs_->ns(), errmsg, min_, max_, keyPattern ) ); } NamespaceDetails *d = nsdetails(ns); - plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(id), fbs_, order_, min_, max_ ) ) ); + plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(id), *fbs_, _originalQuery, order_, min_, max_ ) ) ); + } + + // returns an IndexDetails * for a hint, 0 if hint is $natural. + // hint must not be eoo() + IndexDetails *parseHint( const BSONElement &hint, NamespaceDetails *d ) { + massert( 13292, "hint eoo", !hint.eoo() ); + if( hint.type() == String ) { + string hintstr = hint.valuestr(); + NamespaceDetails::IndexIterator i = d->ii(); + while( i.more() ) { + IndexDetails& ii = i.next(); + if ( ii.indexName() == hintstr ) { + return ⅈ + } + } + } + else if( hint.type() == Object ) { + BSONObj hintobj = hint.embeddedObject(); + uassert( 10112 , "bad hint", !hintobj.isEmpty() ); + if ( !strcmp( hintobj.firstElement().fieldName(), "$natural" ) ) { + return 0; + } + NamespaceDetails::IndexIterator i = d->ii(); + while( i.more() ) { + IndexDetails& ii = i.next(); + if( ii.keyPattern().woCompare(hintobj) == 0 ) { + return ⅈ + } + } + } + uassert( 10113 , "bad hint", false ); + return 0; } void QueryPlanSet::init() { - DEBUGQO( "QueryPlanSet::init " << ns << "\t" << query_ ); + DEBUGQO( "QueryPlanSet::init " << ns << "\t" << _originalQuery ); plans_.clear(); mayRecordPlan_ = true; usingPrerecordedPlan_ = false; - const char *ns = fbs_.ns(); + const char *ns = fbs_->ns(); NamespaceDetails *d = nsdetails( ns ); - if ( !d || !fbs_.matchPossible() ) { + if ( !d || !fbs_->matchPossible() ) { // Table scan plan, when no matches are possible - plans_.push_back( PlanPtr( new QueryPlan( d, -1, fbs_, order_ ) ) ); + plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) ); return; } BSONElement hint = hint_.firstElement(); if ( !hint.eoo() ) { mayRecordPlan_ = false; - if( hint.type() == String ) { - string hintstr = hint.valuestr(); - NamespaceDetails::IndexIterator i = d->ii(); - while( i.more() ) { - IndexDetails& ii = i.next(); - if ( ii.indexName() == hintstr ) { - addHint( ii ); - return; - } - } - } - else if( hint.type() == Object ) { - BSONObj hintobj = hint.embeddedObject(); - uassert( 10112 , "bad hint", !hintobj.isEmpty() ); - if ( !strcmp( hintobj.firstElement().fieldName(), "$natural" ) ) { - massert( 10366 , "natural order cannot be specified with $min/$max", min_.isEmpty() && max_.isEmpty() ); - // Table scan plan - plans_.push_back( PlanPtr( new QueryPlan( d, -1, fbs_, order_ ) ) ); - return; - } - NamespaceDetails::IndexIterator i = d->ii(); - while( i.more() ) { - IndexDetails& ii = i.next(); - if( ii.keyPattern().woCompare(hintobj) == 0 ) { - addHint( ii ); - return; - } - } + IndexDetails *id = parseHint( hint, d ); + if ( id ) { + addHint( *id ); + } else { + massert( 10366 , "natural order cannot be specified with $min/$max", min_.isEmpty() && max_.isEmpty() ); + // Table scan plan + plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) ); } - uassert( 10113 , "bad hint", false ); + return; } if ( !min_.isEmpty() || !max_.isEmpty() ) { @@ -311,56 +332,54 @@ BSONObj keyPattern; IndexDetails *idx = indexDetailsForRange( ns, errmsg, min_, max_, keyPattern ); massert( 10367 , errmsg, idx ); - plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(*idx), fbs_, order_, min_, max_ ) ) ); + plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(*idx), *fbs_, _originalQuery, order_, min_, max_ ) ) ); return; } - if ( isSimpleIdQuery( query_ ) ){ + if ( isSimpleIdQuery( _originalQuery ) ){ int idx = d->findIdIndex(); if ( idx >= 0 ){ usingPrerecordedPlan_ = true; mayRecordPlan_ = false; - plans_.push_back( PlanPtr( new QueryPlan( d , idx , fbs_ , order_ ) ) ); + plans_.push_back( PlanPtr( new QueryPlan( d , idx , *fbs_ , _originalQuery, order_ ) ) ); return; } } - if ( query_.isEmpty() && order_.isEmpty() ){ - plans_.push_back( PlanPtr( new QueryPlan( d, -1, fbs_, order_ ) ) ); + if ( _originalQuery.isEmpty() && order_.isEmpty() ){ + plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) ); return; } - DEBUGQO( "\t special : " << fbs_.getSpecial() ); - if ( fbs_.getSpecial().size() ){ - _special = fbs_.getSpecial(); + DEBUGQO( "\t special : " << fbs_->getSpecial() ); + if ( fbs_->getSpecial().size() ){ + _special = fbs_->getSpecial(); NamespaceDetails::IndexIterator i = d->ii(); while( i.more() ) { int j = i.pos(); IndexDetails& ii = i.next(); const IndexSpec& spec = ii.getSpec(); - if ( spec.getTypeName() == _special && spec.suitability( query_ , order_ ) ){ + if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , order_ ) ){ usingPrerecordedPlan_ = true; - mayRecordPlan_ = true; - plans_.push_back( PlanPtr( new QueryPlan( d , j , fbs_ , order_ , + mayRecordPlan_ = false; + plans_.push_back( PlanPtr( new QueryPlan( d , j , *fbs_ , _originalQuery, order_ , BSONObj() , BSONObj() , _special ) ) ); return; } } - uassert( 13038 , (string)"can't find special index: " + _special + " for: " + query_.toString() , 0 ); + uassert( 13038 , (string)"can't find special index: " + _special + " for: " + _originalQuery.toString() , 0 ); } if ( honorRecordedPlan_ ) { scoped_lock lk(NamespaceDetailsTransient::_qcMutex); NamespaceDetailsTransient& nsd = NamespaceDetailsTransient::get_inlock( ns ); - BSONObj bestIndex = nsd.indexForPattern( fbs_.pattern( order_ ) ); + BSONObj bestIndex = nsd.indexForPattern( fbs_->pattern( order_ ) ); if ( !bestIndex.isEmpty() ) { - usingPrerecordedPlan_ = true; - mayRecordPlan_ = false; - oldNScanned_ = nsd.nScannedForPattern( fbs_.pattern( order_ ) ); + PlanPtr p; + oldNScanned_ = nsd.nScannedForPattern( fbs_->pattern( order_ ) ); if ( !strcmp( bestIndex.firstElement().fieldName(), "$natural" ) ) { // Table scan plan - plans_.push_back( PlanPtr( new QueryPlan( d, -1, fbs_, order_ ) ) ); - return; + p.reset( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ); } NamespaceDetails::IndexIterator i = d->ii(); @@ -368,11 +387,17 @@ int j = i.pos(); IndexDetails& ii = i.next(); if( ii.keyPattern().woCompare(bestIndex) == 0 ) { - plans_.push_back( PlanPtr( new QueryPlan( d, j, fbs_, order_ ) ) ); - return; + p.reset( new QueryPlan( d, j, *fbs_, _originalQuery, order_ ) ); } } - massert( 10368 , "Unable to locate previously recorded index", false ); + + massert( 10368 , "Unable to locate previously recorded index", p.get() ); + if ( !( _bestGuessOnly && p->scanAndOrderRequired() ) ) { + usingPrerecordedPlan_ = true; + mayRecordPlan_ = false; + plans_.push_back( p ); + return; + } } } @@ -380,16 +405,16 @@ } void QueryPlanSet::addOtherPlans( bool checkFirst ) { - const char *ns = fbs_.ns(); + const char *ns = fbs_->ns(); NamespaceDetails *d = nsdetails( ns ); if ( !d ) return; // If table scan is optimal or natural order requested or tailable cursor requested - if ( !fbs_.matchPossible() || ( fbs_.nNontrivialRanges() == 0 && order_.isEmpty() ) || + if ( !fbs_->matchPossible() || ( fbs_->nNontrivialRanges() == 0 && order_.isEmpty() ) || ( !order_.isEmpty() && !strcmp( order_.firstElement().fieldName(), "$natural" ) ) ) { // Table scan plan - addPlan( PlanPtr( new QueryPlan( d, -1, fbs_, order_ ) ), checkFirst ); + addPlan( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ), checkFirst ); return; } @@ -401,12 +426,12 @@ const IndexSpec& spec = id.getSpec(); IndexSuitability suitability = HELPFUL; if ( normalQuery ){ - suitability = spec.suitability( query_ , order_ ); + suitability = spec.suitability( fbs_->simplifiedQuery() , order_ ); if ( suitability == USELESS ) continue; } - PlanPtr p( new QueryPlan( d, i, fbs_, order_ ) ); + PlanPtr p( new QueryPlan( d, i, *fbs_, _originalQuery, order_ ) ); if ( p->optimal() ) { addPlan( p, checkFirst ); return; @@ -418,7 +443,7 @@ addPlan( *i, checkFirst ); // Table scan plan - addPlan( PlanPtr( new QueryPlan( d, -1, fbs_, order_ ) ), checkFirst ); + addPlan( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ), checkFirst ); } shared_ptr< QueryOp > QueryPlanSet::runOp( QueryOp &op ) { @@ -426,11 +451,11 @@ Runner r( *this, op ); shared_ptr< QueryOp > res = r.run(); // plans_.size() > 1 if addOtherPlans was called in Runner::run(). - if ( res->complete() || plans_.size() > 1 ) + if ( _bestGuessOnly || res->complete() || plans_.size() > 1 ) return res; { scoped_lock lk(NamespaceDetailsTransient::_qcMutex); - NamespaceDetailsTransient::get_inlock( fbs_.ns() ).registerIndexForPattern( fbs_.pattern( order_ ), BSONObj(), 0 ); + NamespaceDetailsTransient::get_inlock( fbs_->ns() ).registerIndexForPattern( fbs_->pattern( order_ ), BSONObj(), 0 ); } init(); } @@ -441,33 +466,80 @@ BSONObj QueryPlanSet::explain() const { vector< BSONObj > arr; for( PlanSet::const_iterator i = plans_.begin(); i != plans_.end(); ++i ) { - auto_ptr< Cursor > c = (*i)->newCursor(); + shared_ptr c = (*i)->newCursor(); BSONObjBuilder explain; explain.append( "cursor", c->toString() ); - explain.appendArray( "indexBounds", c->prettyIndexBounds() ); + explain.append( "indexBounds", c->prettyIndexBounds() ); arr.push_back( explain.obj() ); } BSONObjBuilder b; b.append( "allPlans", arr ); return b.obj(); } + + QueryPlanSet::PlanPtr QueryPlanSet::getBestGuess() const { + assert( plans_.size() ); + if ( plans_[ 0 ]->scanAndOrderRequired() ){ + for ( unsigned i=1; iscanAndOrderRequired() ) + return plans_[i]; + } + + stringstream ss; + ss << "best guess plan requested, but scan and order required:"; + ss << " query: " << fbs_->simplifiedQuery(); + ss << " order: " << order_; + ss << " choices: "; + for ( unsigned i=0; iindexKey() << " "; + } + + string s = ss.str(); + msgassertedNoTrace( 13284, s.c_str() ); + } + return plans_[0]; + } QueryPlanSet::Runner::Runner( QueryPlanSet &plans, QueryOp &op ) : op_( op ), plans_( plans ) { } + void QueryPlanSet::Runner::mayYield( const vector< shared_ptr< QueryOp > > &ops ) { + if ( plans_._mayYield ) { + if ( plans_._yieldSometimesTracker.ping() ) { + int micros = ClientCursor::yieldSuggest(); + if ( micros > 0 ) { + for( vector< shared_ptr< QueryOp > >::const_iterator i = ops.begin(); i != ops.end(); ++i ) { + if ( !prepareToYield( **i ) ) { + return; + } + } + ClientCursor::staticYield( micros ); + for( vector< shared_ptr< QueryOp > >::const_iterator i = ops.begin(); i != ops.end(); ++i ) { + recoverFromYield( **i ); + } + } + } + } + } + shared_ptr< QueryOp > QueryPlanSet::Runner::run() { massert( 10369 , "no plans", plans_.plans_.size() > 0 ); - if ( plans_.plans_.size() > 1 ) - log(1) << " running multiple plans" << endl; - vector< shared_ptr< QueryOp > > ops; - for( PlanSet::iterator i = plans_.plans_.begin(); i != plans_.plans_.end(); ++i ) { - shared_ptr< QueryOp > op( op_.clone() ); - op->setQueryPlan( i->get() ); - ops.push_back( op ); + if ( plans_._bestGuessOnly ) { + shared_ptr< QueryOp > op( op_.createChild() ); + op->setQueryPlan( plans_.getBestGuess().get() ); + ops.push_back( op ); + } else { + if ( plans_.plans_.size() > 1 ) + log(1) << " running multiple plans" << endl; + for( PlanSet::iterator i = plans_.plans_.begin(); i != plans_.plans_.end(); ++i ) { + shared_ptr< QueryOp > op( op_.createChild() ); + op->setQueryPlan( i->get() ); + ops.push_back( op ); + } } for( vector< shared_ptr< QueryOp > >::iterator i = ops.begin(); i != ops.end(); ++i ) { @@ -483,13 +555,16 @@ unsigned errCount = 0; bool first = true; for( vector< shared_ptr< QueryOp > >::iterator i = ops.begin(); i != ops.end(); ++i ) { + mayYield( ops ); QueryOp &op = **i; nextOp( op ); if ( op.complete() ) { - if ( first ) + if ( first ) { nScanned += nScannedBackup; - if ( plans_.mayRecordPlan_ && op.mayRecordPlan() ) + } + if ( plans_.mayRecordPlan_ && op.mayRecordPlan() ) { op.qp().registerSelf( nScanned ); + } return *i; } if ( op.error() ) @@ -498,12 +573,12 @@ } if ( errCount == ops.size() ) break; - if ( plans_.usingPrerecordedPlan_ && nScanned > plans_.oldNScanned_ * 10 && plans_._special.empty() ) { + if ( !plans_._bestGuessOnly && plans_.usingPrerecordedPlan_ && nScanned > plans_.oldNScanned_ * 10 && plans_._special.empty() ) { plans_.addOtherPlans( true ); PlanSet::iterator i = plans_.plans_.begin(); ++i; for( ; i != plans_.plans_.end(); ++i ) { - shared_ptr< QueryOp > op( op_.clone() ); + shared_ptr< QueryOp > op( op_.createChild() ); op->setQueryPlan( i->get() ); ops.push_back( op ); initOp( *op ); @@ -519,27 +594,143 @@ return ops[ 0 ]; } +#define GUARD_OP_EXCEPTION( op, expression ) \ + try { \ + expression; \ + } \ + catch ( DBException& e ) { \ + op.setException( e.getInfo() ); \ + } \ + catch ( const std::exception &e ) { \ + op.setException( ExceptionInfo( e.what() , 0 ) ); \ + } \ + catch ( ... ) { \ + op.setException( ExceptionInfo( "Caught unknown exception" , 0 ) ); \ + } + + void QueryPlanSet::Runner::initOp( QueryOp &op ) { - try { - op.init(); - } catch ( const std::exception &e ) { - op.setExceptionMessage( e.what() ); - } catch ( ... ) { - op.setExceptionMessage( "Caught unknown exception" ); - } + GUARD_OP_EXCEPTION( op, op.init() ); } void QueryPlanSet::Runner::nextOp( QueryOp &op ) { - try { - if ( !op.error() ) - op.next(); - } catch ( const std::exception &e ) { - op.setExceptionMessage( e.what() ); - } catch ( ... ) { - op.setExceptionMessage( "Caught unknown exception" ); - } + GUARD_OP_EXCEPTION( op, if ( !op.error() ) { op.next(); } ); + } + + bool QueryPlanSet::Runner::prepareToYield( QueryOp &op ) { + GUARD_OP_EXCEPTION( op, + if ( op.error() ) { + return true; + } else { + return op.prepareToYield(); + } ); + return true; + } + + void QueryPlanSet::Runner::recoverFromYield( QueryOp &op ) { + GUARD_OP_EXCEPTION( op, if ( !op.error() ) { op.recoverFromYield(); } ); + } + + + MultiPlanScanner::MultiPlanScanner( const char *ns, + const BSONObj &query, + const BSONObj &order, + const BSONElement *hint, + bool honorRecordedPlan, + const BSONObj &min, + const BSONObj &max, + bool bestGuessOnly, + bool mayYield ) : + _ns( ns ), + _or( !query.getField( "$or" ).eoo() ), + _query( query.getOwned() ), + _fros( ns, _query ), + _i(), + _honorRecordedPlan( honorRecordedPlan ), + _bestGuessOnly( bestGuessOnly ), + _hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ), + _mayYield( mayYield ), + _tableScanned() + { + if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() || !_fros.getSpecial().empty() ) { + _or = false; + } + if ( _or && uselessOr( _hint.firstElement() ) ) { + _or = false; + } + // if _or == false, don't use or clauses for index selection + if ( !_or ) { + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns, _query ) ); + _currentQps.reset( new QueryPlanSet( ns, frs, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) ); + } else { + BSONElement e = _query.getField( "$or" ); + massert( 13268, "invalid $or spec", e.type() == Array && e.embeddedObject().nFields() > 0 ); + } } + shared_ptr< QueryOp > MultiPlanScanner::runOpOnce( QueryOp &op ) { + massert( 13271, "can't run more ops", mayRunMore() ); + if ( !_or ) { + ++_i; + return _currentQps->runOp( op ); + } + ++_i; + auto_ptr< FieldRangeSet > frs( _fros.topFrs() ); + BSONElement hintElt = _hint.firstElement(); + _currentQps.reset( new QueryPlanSet( _ns, frs, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) ); + shared_ptr< QueryOp > ret( _currentQps->runOp( op ) ); + if ( ret->qp().willScanTable() ) { + _tableScanned = true; + } + _fros.popOrClause(); + return ret; + } + + shared_ptr< QueryOp > MultiPlanScanner::runOp( QueryOp &op ) { + shared_ptr< QueryOp > ret = runOpOnce( op ); + while( !ret->stopRequested() && mayRunMore() ) { + ret = runOpOnce( *ret ); + } + return ret; + } + + bool MultiPlanScanner::uselessOr( const BSONElement &hint ) const { + NamespaceDetails *nsd = nsdetails( _ns ); + if ( !nsd ) { + return true; + } + IndexDetails *id = 0; + if ( !hint.eoo() ) { + IndexDetails *id = parseHint( hint, nsd ); + if ( !id ) { + return true; + } + } + vector< BSONObj > ret; + _fros.allClausesSimplified( ret ); + for( vector< BSONObj >::const_iterator i = ret.begin(); i != ret.end(); ++i ) { + if ( id ) { + if ( id->getSpec().suitability( *i, BSONObj() ) == USELESS ) { + return true; + } + } else { + bool useful = false; + NamespaceDetails::IndexIterator j = nsd->ii(); + while( j.more() ) { + IndexDetails &id = j.next(); + if ( id.getSpec().suitability( *i, BSONObj() ) != USELESS ) { + useful = true; + break; + } + } + if ( !useful ) { + return true; + } + } + } + return false; + } + bool indexWorks( const BSONObj &idxPattern, const BSONObj &sampleKey, int direction, int firstSignificantField ) { BSONObjIterator p( idxPattern ); BSONObjIterator k( sampleKey ); diff -Nru mongodb-1.4.4/db/queryoptimizer.h mongodb-1.6.3/db/queryoptimizer.h --- mongodb-1.4.4/db/queryoptimizer.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/queryoptimizer.h 2010-09-24 10:02:42.000000000 -0700 @@ -21,6 +21,8 @@ #include "cursor.h" #include "jsobj.h" #include "queryutil.h" +#include "matcher.h" +#include "../util/message.h" namespace mongo { @@ -32,6 +34,7 @@ QueryPlan(NamespaceDetails *_d, int _idxNo, // -1 = no index const FieldRangeSet &fbs, + const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey = BSONObj(), const BSONObj &endKey = BSONObj() , @@ -49,32 +52,36 @@ requested sort order */ bool unhelpful() const { return unhelpful_; } int direction() const { return direction_; } - auto_ptr< Cursor > newCursor( const DiskLoc &startLoc = DiskLoc() , int numWanted=0 ) const; - auto_ptr< Cursor > newReverseCursor() const; + shared_ptr newCursor( const DiskLoc &startLoc = DiskLoc() , int numWanted=0 ) const; + shared_ptr newReverseCursor() const; BSONObj indexKey() const; + bool willScanTable() const { return !index_ && fbs_.matchPossible(); } const char *ns() const { return fbs_.ns(); } NamespaceDetails *nsd() const { return d; } - BSONObj query() const { return fbs_.query(); } + BSONObj originalQuery() const { return _originalQuery; } BSONObj simplifiedQuery( const BSONObj& fields = BSONObj() ) const { return fbs_.simplifiedQuery( fields ); } const FieldRange &range( const char *fieldName ) const { return fbs_.range( fieldName ); } void registerSelf( long long nScanned ) const; - // just for testing - BoundList indexBounds() const { return indexBounds_; } + shared_ptr< FieldRangeVector > frv() const { return _frv; } private: NamespaceDetails *d; int idxNo; const FieldRangeSet &fbs_; + const BSONObj &_originalQuery; const BSONObj &order_; const IndexDetails *index_; bool optimal_; bool scanAndOrderRequired_; bool exactKeyMatch_; int direction_; - BoundList indexBounds_; + shared_ptr< FieldRangeVector > _frv; + BSONObj _startKey; + BSONObj _endKey; bool endKeyInclusive_; bool unhelpful_; string _special; IndexType * _type; + bool _startOrEndSpec; }; // Inherit from this interface to implement a new query operation. @@ -82,61 +89,113 @@ // each clone its own query plan. class QueryOp { public: - QueryOp() : complete_(), qp_(), error_() {} + QueryOp() : _complete(), _stopRequested(), _qp(), _error() {} + + // Used when handing off from one QueryOp type to another + QueryOp( const QueryOp &other ) : + _complete(), _stopRequested(), _qp(), _error(), _matcher( other._matcher ), + _orConstraint( other._orConstraint ) {} + virtual ~QueryOp() {} - /** this gets called after a query plan is set? ERH 2/16/10 */ - virtual void init() = 0; + /** these gets called after a query plan is set */ + void init() { + if ( _oldMatcher.get() ) { + _matcher.reset( _oldMatcher->nextClauseMatcher( qp().indexKey() ) ); + } else { + _matcher.reset( new CoveredIndexMatcher( qp().originalQuery(), qp().indexKey(), alwaysUseRecord() ) ); + } + _init(); + } virtual void next() = 0; + virtual bool mayRecordPlan() const = 0; + virtual bool prepareToYield() { massert( 13335, "yield not supported", false ); return false; } + virtual void recoverFromYield() { massert( 13336, "yield not supported", false ); } + /** @return a copy of the inheriting class, which will be run with its own - query plan. + query plan. If multiple plan sets are required for an $or query, + the QueryOp of the winning plan from a given set will be cloned + to generate QueryOps for the subsequent plan set. This function + should only be called after the query op has completed executing. */ - virtual QueryOp *clone() const = 0; - bool complete() const { return complete_; } - bool error() const { return error_; } - string exceptionMessage() const { return exceptionMessage_; } - const QueryPlan &qp() const { return *qp_; } + QueryOp *createChild() { + if( _orConstraint.get() ) { + _matcher->advanceOrClause( _orConstraint ); + _orConstraint.reset(); + } + QueryOp *ret = _createChild(); + ret->_oldMatcher = _matcher; + return ret; + } + bool complete() const { return _complete; } + bool error() const { return _error; } + bool stopRequested() const { return _stopRequested; } + ExceptionInfo exception() const { return _exception; } + const QueryPlan &qp() const { return *_qp; } // To be called by QueryPlanSet::Runner only. - void setQueryPlan( const QueryPlan *qp ) { qp_ = qp; } - void setExceptionMessage( const string &exceptionMessage ) { - error_ = true; - exceptionMessage_ = exceptionMessage; + void setQueryPlan( const QueryPlan *qp ) { _qp = qp; } + void setException( const DBException &e ) { + _error = true; + _exception = e.getInfo(); } + shared_ptr< CoveredIndexMatcher > matcher() const { return _matcher; } protected: - void setComplete() { complete_ = true; } + void setComplete() { + _orConstraint = qp().frv(); + _complete = true; + } + void setStop() { setComplete(); _stopRequested = true; } + + virtual void _init() = 0; + + virtual QueryOp *_createChild() const = 0; + + virtual bool alwaysUseRecord() const { return false; } + private: - bool complete_; - string exceptionMessage_; - const QueryPlan *qp_; - bool error_; + bool _complete; + bool _stopRequested; + ExceptionInfo _exception; + const QueryPlan *_qp; + bool _error; + shared_ptr< CoveredIndexMatcher > _matcher; + shared_ptr< CoveredIndexMatcher > _oldMatcher; + shared_ptr< FieldRangeVector > _orConstraint; }; // Set of candidate query plans for a particular query. Used for running // a QueryOp on these plans. class QueryPlanSet { public: + + typedef boost::shared_ptr< QueryPlan > PlanPtr; + typedef vector< PlanPtr > PlanSet; + QueryPlanSet( const char *ns, - const BSONObj &query, + auto_ptr< FieldRangeSet > frs, + const BSONObj &originalQuery, const BSONObj &order, const BSONElement *hint = 0, bool honorRecordedPlan = true, const BSONObj &min = BSONObj(), - const BSONObj &max = BSONObj() ); + const BSONObj &max = BSONObj(), + bool bestGuessOnly = false, + bool mayYield = false); int nPlans() const { return plans_.size(); } shared_ptr< QueryOp > runOp( QueryOp &op ); template< class T > shared_ptr< T > runOp( T &op ) { return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) ); } - const FieldRangeSet &fbs() const { return fbs_; } BSONObj explain() const; bool usingPrerecordedPlan() const { return usingPrerecordedPlan_; } + PlanPtr getBestGuess() const; + //for testing + const FieldRangeSet &fbs() const { return *fbs_; } private: void addOtherPlans( bool checkFirst ); - typedef boost::shared_ptr< QueryPlan > PlanPtr; - typedef vector< PlanPtr > PlanSet; void addPlan( PlanPtr plan, bool checkFirst ) { if ( checkFirst && plan->indexKey().woCompare( plans_[ 0 ]->indexKey() ) == 0 ) return; @@ -147,14 +206,17 @@ struct Runner { Runner( QueryPlanSet &plans, QueryOp &op ); shared_ptr< QueryOp > run(); + void mayYield( const vector< shared_ptr< QueryOp > > &ops ); QueryOp &op_; QueryPlanSet &plans_; static void initOp( QueryOp &op ); static void nextOp( QueryOp &op ); + static bool prepareToYield( QueryOp &op ); + static void recoverFromYield( QueryOp &op ); }; const char *ns; - BSONObj query_; - FieldRangeSet fbs_; + BSONObj _originalQuery; + auto_ptr< FieldRangeSet > fbs_; PlanSet plans_; bool mayRecordPlan_; bool usingPrerecordedPlan_; @@ -165,16 +227,196 @@ BSONObj min_; BSONObj max_; string _special; + bool _bestGuessOnly; + bool _mayYield; + ElapsedTracker _yieldSometimesTracker; }; + // Handles $or type queries by generating a QueryPlanSet for each $or clause + // NOTE on our $or implementation: In our current qo implementation we don't + // keep statistics on our data, but we can conceptualize the problem of + // selecting an index when statistics exist for all index ranges. The + // d-hitting set problem on k sets and n elements can be reduced to the + // problem of index selection on k $or clauses and n index ranges (where + // d is the max number of indexes, and the number of ranges n is unbounded). + // In light of the fact that d-hitting set is np complete, and we don't even + // track statistics (so cost calculations are expensive) our first + // implementation uses the following greedy approach: We take one $or clause + // at a time and treat each as a separate query for index selection purposes. + // But if an index range is scanned for a particular $or clause, we eliminate + // that range from all subsequent clauses. One could imagine an opposite + // implementation where we select indexes based on the union of index ranges + // for all $or clauses, but this can have much poorer worst case behavior. + // (An index range that suits one $or clause may not suit another, and this + // is worse than the typical case of index range choice staleness because + // with $or the clauses may likely be logically distinct.) The greedy + // implementation won't do any worse than all the $or clauses individually, + // and it can often do better. In the first cut we are intentionally using + // QueryPattern tracking to record successful plans on $or clauses for use by + // subsequent $or clauses, even though there may be a significant aggregate + // $nor component that would not be represented in QueryPattern. + class MultiPlanScanner { + public: + MultiPlanScanner( const char *ns, + const BSONObj &query, + const BSONObj &order, + const BSONElement *hint = 0, + bool honorRecordedPlan = true, + const BSONObj &min = BSONObj(), + const BSONObj &max = BSONObj(), + bool bestGuessOnly = false, + bool mayYield = false); + shared_ptr< QueryOp > runOp( QueryOp &op ); + template< class T > + shared_ptr< T > runOp( T &op ) { + return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) ); + } + shared_ptr< QueryOp > runOpOnce( QueryOp &op ); + template< class T > + shared_ptr< T > runOpOnce( T &op ) { + return dynamic_pointer_cast< T >( runOpOnce( static_cast< QueryOp& >( op ) ) ); + } + bool mayRunMore() const { return _or ? ( !_tableScanned && !_fros.orFinished() ) : _i == 0; } + BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); } + // just report this when only one query op + bool usingPrerecordedPlan() const { + return !_or && _currentQps->usingPrerecordedPlan(); + } + void setBestGuessOnly() { _bestGuessOnly = true; } + void mayYield( bool val ) { _mayYield = val; } + private: + void assertNotOr() const { + massert( 13266, "not implemented for $or query", !_or ); + } + bool uselessOr( const BSONElement &hint ) const; + const char * _ns; + bool _or; + BSONObj _query; + FieldRangeOrSet _fros; + auto_ptr< QueryPlanSet > _currentQps; + int _i; + bool _honorRecordedPlan; + bool _bestGuessOnly; + BSONObj _hint; + bool _mayYield; + bool _tableScanned; + }; + + class MultiCursor : public Cursor { + public: + class CursorOp : public QueryOp { + public: + CursorOp() {} + CursorOp( const QueryOp &other ) : QueryOp( other ) {} + virtual shared_ptr< Cursor > newCursor() const = 0; + }; + // takes ownership of 'op' + MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr< CursorOp > op = shared_ptr< CursorOp >(), bool mayYield = false ) + : _mps( new MultiPlanScanner( ns, pattern, order, 0, true, BSONObj(), BSONObj(), !op.get(), mayYield ) ) { + if ( op.get() ) { + _op = op; + } else { + _op.reset( new NoOp() ); + } + if ( _mps->mayRunMore() ) { + nextClause(); + if ( !ok() ) { + advance(); + } + } else { + _c.reset( new BasicCursor( DiskLoc() ) ); + } + } + // used to handoff a query to a getMore() + MultiCursor( auto_ptr< MultiPlanScanner > mps, const shared_ptr< Cursor > &c, const shared_ptr< CoveredIndexMatcher > &matcher, const QueryOp &op ) + : _op( new NoOp( op ) ), _c( c ), _mps( mps ), _matcher( matcher ) { + _mps->setBestGuessOnly(); + _mps->mayYield( false ); // with a NoOp, there's no need to yield in QueryPlanSet + if ( !ok() ) { + // would have been advanced by UserQueryOp if possible + advance(); + } + } + virtual bool ok() { return _c->ok(); } + virtual Record* _current() { return _c->_current(); } + virtual BSONObj current() { return _c->current(); } + virtual DiskLoc currLoc() { return _c->currLoc(); } + virtual bool advance() { + _c->advance(); + while( !ok() && _mps->mayRunMore() ) { + nextClause(); + } + return ok(); + } + virtual BSONObj currKey() const { return _c->currKey(); } + virtual DiskLoc refLoc() { return _c->refLoc(); } + virtual void noteLocation() { + _c->noteLocation(); + } + virtual void checkLocation() { + _c->checkLocation(); + } + virtual bool supportGetMore() { return true; } + virtual bool supportYields() { return _c->supportYields(); } + // with update we could potentially get the same document on multiple + // indexes, but update appears to already handle this with seenObjects + // so we don't have to do anything special here. + virtual bool getsetdup(DiskLoc loc) { + return _c->getsetdup( loc ); + } + virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); } + // just for testing + shared_ptr< Cursor > sub_c() const { return _c; } + private: + class NoOp : public CursorOp { + public: + NoOp() {} + NoOp( const QueryOp &other ) : CursorOp( other ) {} + virtual void _init() { setComplete(); } + virtual void next() {} + virtual bool mayRecordPlan() const { return false; } + virtual QueryOp *_createChild() const { return new NoOp(); } + virtual shared_ptr< Cursor > newCursor() const { return qp().newCursor(); } + }; + void nextClause() { + shared_ptr< CursorOp > best = _mps->runOpOnce( *_op ); + if ( ! best->complete() ) + throw MsgAssertionException( best->exception() ); + _c = best->newCursor(); + _matcher = best->matcher(); + _op = best; + } + shared_ptr< CursorOp > _op; + shared_ptr< Cursor > _c; + auto_ptr< MultiPlanScanner > _mps; + shared_ptr< CoveredIndexMatcher > _matcher; + }; + // NOTE min, max, and keyPattern will be updated to be consistent with the selected index. IndexDetails *indexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ); inline bool isSimpleIdQuery( const BSONObj& query ){ - return - strcmp( query.firstElement().fieldName() , "_id" ) == 0 && - query.nFields() == 1 && - query.firstElement().isSimpleType(); + BSONObjIterator i(query); + if( !i.more() ) return false; + BSONElement e = i.next(); + if( i.more() ) return false; + if( strcmp("_id", e.fieldName()) != 0 ) return false; + return e.isSimpleType(); // e.g. not something like { _id : { $gt : ... + } + + // matcher() will always work on the returned cursor + inline shared_ptr< Cursor > bestGuessCursor( const char *ns, const BSONObj &query, const BSONObj &sort ) { + if( !query.getField( "$or" ).eoo() ) { + return shared_ptr< Cursor >( new MultiCursor( ns, query, sort ) ); + } else { + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns, query ) ); + shared_ptr< Cursor > ret = QueryPlanSet( ns, frs, query, sort ).getBestGuess()->newCursor(); + if ( !query.isEmpty() ) { + shared_ptr< CoveredIndexMatcher > matcher( new CoveredIndexMatcher( query, ret->indexKeyPattern() ) ); + ret->setMatcher( matcher ); + } + return ret; + } } } // namespace mongo diff -Nru mongodb-1.4.4/db/queryutil.cpp mongodb-1.6.3/db/queryutil.cpp --- mongodb-1.4.4/db/queryutil.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/queryutil.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,15 +15,18 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "btree.h" #include "matcher.h" #include "pdfile.h" #include "queryoptimizer.h" #include "../util/unittest.h" +#include "dbmessage.h" namespace mongo { + extern BSONObj staticNull; + /** returns a string that when used as a matcher, would match a super set of regex() returns "" for complex regular expressions used to optimize queries in some simple regex cases that start with '^' @@ -35,11 +38,25 @@ if (purePrefix) *purePrefix = false; + bool multilineOK; + if ( regex[0] == '\\' && regex[1] == 'A'){ + multilineOK = true; + regex += 2; + } else if (regex[0] == '^') { + multilineOK = false; + regex += 1; + } else { + return r; + } + bool extended = false; while (*flags){ switch (*(flags++)){ case 'm': // multiline - continue; + if (multilineOK) + continue; + else + return r; case 'x': // extended extended = true; break; @@ -48,9 +65,6 @@ } } - if ( *(regex++) != '^' ) - return r; - stringstream ss; while(*regex){ @@ -131,7 +145,7 @@ } for( set< BSONElement, element_lt >::const_iterator i = vals.begin(); i != vals.end(); ++i ) - intervals_.push_back( FieldInterval(*i) ); + _intervals.push_back( FieldInterval(*i) ); for( vector< FieldRange >::const_iterator i = regexes.begin(); i != regexes.end(); ++i ) *this |= *i; @@ -141,25 +155,25 @@ if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ){ - intervals_.push_back( FieldInterval(e) ); + _intervals.push_back( FieldInterval(e) ); const BSONElement& temp = e.embeddedObject().firstElement(); if ( ! temp.eoo() ){ if ( temp < e ) - intervals_.insert( intervals_.begin() , temp ); + _intervals.insert( _intervals.begin() , temp ); else - intervals_.push_back( FieldInterval(temp) ); + _intervals.push_back( FieldInterval(temp) ); } return; } - intervals_.push_back( FieldInterval() ); - FieldInterval &initial = intervals_[ 0 ]; - BSONElement &lower = initial.lower_.bound_; - bool &lowerInclusive = initial.lower_.inclusive_; - BSONElement &upper = initial.upper_.bound_; - bool &upperInclusive = initial.upper_.inclusive_; + _intervals.push_back( FieldInterval() ); + FieldInterval &initial = _intervals[ 0 ]; + BSONElement &lower = initial._lower._bound; + bool &lowerInclusive = initial._lower._inclusive; + BSONElement &upper = initial._upper._bound; + bool &upperInclusive = initial._upper._inclusive; lower = minKey.firstElement(); lowerInclusive = true; upper = maxKey.firstElement(); @@ -190,13 +204,13 @@ // regex matches self - regex type > string type if (e.type() == RegEx){ BSONElement re = addObj( BSON( "" << e ) ).firstElement(); - intervals_.push_back( FieldInterval(re) ); + _intervals.push_back( FieldInterval(re) ); } else { BSONObj orig = e.embeddedObject(); BSONObjBuilder b; b.appendRegex("", orig["$regex"].valuestrsafe(), orig["$options"].valuestrsafe()); BSONElement re = addObj( b.obj() ).firstElement(); - intervals_.push_back( FieldInterval(re) ); + _intervals.push_back( FieldInterval(re) ); } } @@ -334,63 +348,67 @@ } + void FieldRange::finishOperation( const vector< FieldInterval > &newIntervals, const FieldRange &other ) { + _intervals = newIntervals; + for( vector< BSONObj >::const_iterator i = other._objData.begin(); i != other._objData.end(); ++i ) + _objData.push_back( *i ); + if ( _special.size() == 0 && other._special.size() ) + _special = other._special; + } + // as called, these functions find the max/min of a bound in the // opposite direction, so inclusive bounds are considered less // superlative FieldBound maxFieldBound( const FieldBound &a, const FieldBound &b ) { - int cmp = a.bound_.woCompare( b.bound_, false ); - if ( ( cmp == 0 && !b.inclusive_ ) || cmp < 0 ) + int cmp = a._bound.woCompare( b._bound, false ); + if ( ( cmp == 0 && !b._inclusive ) || cmp < 0 ) return b; return a; } FieldBound minFieldBound( const FieldBound &a, const FieldBound &b ) { - int cmp = a.bound_.woCompare( b.bound_, false ); - if ( ( cmp == 0 && !b.inclusive_ ) || cmp > 0 ) + int cmp = a._bound.woCompare( b._bound, false ); + if ( ( cmp == 0 && !b._inclusive ) || cmp > 0 ) return b; return a; } bool fieldIntervalOverlap( const FieldInterval &one, const FieldInterval &two, FieldInterval &result ) { - result.lower_ = maxFieldBound( one.lower_, two.lower_ ); - result.upper_ = minFieldBound( one.upper_, two.upper_ ); - return result.valid(); + result._lower = maxFieldBound( one._lower, two._lower ); + result._upper = minFieldBound( one._upper, two._upper ); + return result.strictValid(); } // NOTE Not yet tested for complex $or bounds, just for simple bounds generated by $in const FieldRange &FieldRange::operator&=( const FieldRange &other ) { vector< FieldInterval > newIntervals; - vector< FieldInterval >::const_iterator i = intervals_.begin(); - vector< FieldInterval >::const_iterator j = other.intervals_.begin(); - while( i != intervals_.end() && j != other.intervals_.end() ) { + vector< FieldInterval >::const_iterator i = _intervals.begin(); + vector< FieldInterval >::const_iterator j = other._intervals.begin(); + while( i != _intervals.end() && j != other._intervals.end() ) { FieldInterval overlap; if ( fieldIntervalOverlap( *i, *j, overlap ) ) newIntervals.push_back( overlap ); - if ( i->upper_ == minFieldBound( i->upper_, j->upper_ ) ) + if ( i->_upper == minFieldBound( i->_upper, j->_upper ) ) ++i; else ++j; } - intervals_ = newIntervals; - for( vector< BSONObj >::const_iterator i = other.objData_.begin(); i != other.objData_.end(); ++i ) - objData_.push_back( *i ); - if ( _special.size() == 0 && other._special.size() ) - _special = other._special; + finishOperation( newIntervals, other ); return *this; } void handleInterval( const FieldInterval &lower, FieldBound &low, FieldBound &high, vector< FieldInterval > &newIntervals ) { - if ( low.bound_.eoo() ) { - low = lower.lower_; high = lower.upper_; + if ( low._bound.eoo() ) { + low = lower._lower; high = lower._upper; } else { - if ( high.bound_.woCompare( lower.lower_.bound_, false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way + if ( high._bound.woCompare( lower._lower._bound, false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way FieldInterval tmp; - tmp.lower_ = low; - tmp.upper_ = high; + tmp._lower = low; + tmp._upper = high; newIntervals.push_back( tmp ); - low = lower.lower_; high = lower.upper_; + low = lower._lower; high = lower._upper; } else { - high = lower.upper_; + high = lower._upper; } } } @@ -399,11 +417,11 @@ vector< FieldInterval > newIntervals; FieldBound low; FieldBound high; - vector< FieldInterval >::const_iterator i = intervals_.begin(); - vector< FieldInterval >::const_iterator j = other.intervals_.begin(); - while( i != intervals_.end() && j != other.intervals_.end() ) { - int cmp = i->lower_.bound_.woCompare( j->lower_.bound_, false ); - if ( ( cmp == 0 && i->lower_.inclusive_ ) || cmp < 0 ) { + vector< FieldInterval >::const_iterator i = _intervals.begin(); + vector< FieldInterval >::const_iterator j = other._intervals.begin(); + while( i != _intervals.end() && j != other._intervals.end() ) { + int cmp = i->_lower._bound.woCompare( j->_lower._bound, false ); + if ( ( cmp == 0 && i->_lower._inclusive ) || cmp < 0 ) { handleInterval( *i, low, high, newIntervals ); ++i; } else { @@ -411,34 +429,85 @@ ++j; } } - while( i != intervals_.end() ) { + while( i != _intervals.end() ) { handleInterval( *i, low, high, newIntervals ); ++i; } - while( j != other.intervals_.end() ) { + while( j != other._intervals.end() ) { handleInterval( *j, low, high, newIntervals ); ++j; } FieldInterval tmp; - tmp.lower_ = low; - tmp.upper_ = high; + tmp._lower = low; + tmp._upper = high; newIntervals.push_back( tmp ); - intervals_ = newIntervals; - for( vector< BSONObj >::const_iterator i = other.objData_.begin(); i != other.objData_.end(); ++i ) - objData_.push_back( *i ); - if ( _special.size() == 0 && other._special.size() ) - _special = other._special; + finishOperation( newIntervals, other ); + return *this; + } + + const FieldRange &FieldRange::operator-=( const FieldRange &other ) { + vector< FieldInterval >::iterator i = _intervals.begin(); + vector< FieldInterval >::const_iterator j = other._intervals.begin(); + while( i != _intervals.end() && j != other._intervals.end() ) { + int cmp = i->_lower._bound.woCompare( j->_lower._bound, false ); + if ( cmp < 0 || + ( cmp == 0 && i->_lower._inclusive && !j->_lower._inclusive ) ) { + int cmp2 = i->_upper._bound.woCompare( j->_lower._bound, false ); + if ( cmp2 < 0 ) { + ++i; + } else if ( cmp2 == 0 ) { + if ( i->_upper._inclusive && j->_lower._inclusive ) { + i->_upper._inclusive = false; + } + ++i; + } else { + int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false ); + if ( cmp3 < 0 || + ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) { + i->_upper = j->_lower; + i->_upper.flipInclusive(); + ++i; + } else { + ++j; + } + } + } else { + int cmp2 = i->_lower._bound.woCompare( j->_upper._bound, false ); + if ( cmp2 > 0 || + ( cmp2 == 0 && ( !i->_lower._inclusive || !j->_lower._inclusive ) ) ) { + ++j; + } else { + int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false ); + if ( cmp3 < 0 || + ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) { + i = _intervals.erase( i ); + } else { + i->_lower = j->_upper; + i->_lower.flipInclusive(); + ++j; + } + } + } + } + finishOperation( _intervals, other ); return *this; } + // TODO write a proper implementation that doesn't do a full copy + bool FieldRange::operator<=( const FieldRange &other ) { + FieldRange temp = *this; + temp -= other; + return temp.empty(); + } + BSONObj FieldRange::addObj( const BSONObj &o ) { - objData_.push_back( o ); + _objData.push_back( o ); return o; } - + string FieldRangeSet::getSpecial() const { string s = ""; - for ( map::iterator i=ranges_.begin(); i!=ranges_.end(); i++ ){ + for ( map::iterator i=_ranges.begin(); i!=_ranges.end(); i++ ){ if ( i->second.getSpecial().size() == 0 ) continue; uassert( 13033 , "can't have 2 special fields" , s.size() == 0 ); @@ -472,64 +541,99 @@ int op3 = getGtLtOp( h ); if ( op3 == BSONObj::Equality ){ - ranges_[ fullname ] &= FieldRange( h , isNot , optimize ); + _ranges[ fullname ] &= FieldRange( h , isNot , optimize ); } else { BSONObjIterator l( h.embeddedObject() ); while ( l.more() ){ - ranges_[ fullname ] &= FieldRange( l.next() , isNot , optimize ); + _ranges[ fullname ] &= FieldRange( l.next() , isNot , optimize ); } } } } else { - ranges_[ fieldName ] &= FieldRange( f , isNot , optimize ); + _ranges[ fieldName ] &= FieldRange( f , isNot , optimize ); } } + void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) { + bool equality = ( getGtLtOp( e ) == BSONObj::Equality ); + if ( equality && e.type() == Object ) { + equality = ( strcmp( e.embeddedObject().firstElement().fieldName(), "$not" ) != 0 ); + } + + if ( equality || ( e.type() == Object && !e.embeddedObject()[ "$regex" ].eoo() ) ) { + _ranges[ e.fieldName() ] &= FieldRange( e , false , optimize ); + } + if ( !equality ) { + BSONObjIterator j( e.embeddedObject() ); + while( j.more() ) { + BSONElement f = j.next(); + if ( strcmp( f.fieldName(), "$not" ) == 0 ) { + switch( f.type() ) { + case Object: { + BSONObjIterator k( f.embeddedObject() ); + while( k.more() ) { + BSONElement g = k.next(); + uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality ); + processOpElement( e.fieldName(), g, true, optimize ); + } + break; + } + case RegEx: + processOpElement( e.fieldName(), f, true, optimize ); + break; + default: + uassert( 13041, "invalid use of $not", false ); + } + } else { + processOpElement( e.fieldName(), f, false, optimize ); + } + } + } + } + FieldRangeSet::FieldRangeSet( const char *ns, const BSONObj &query , bool optimize ) - : ns_( ns ), query_( query.getOwned() ) { - BSONObjIterator i( query_ ); + : _ns( ns ), _queries( 1, query.getOwned() ) { + BSONObjIterator i( _queries[ 0 ] ); + + while( i.more() ) { + BSONElement e = i.next(); + // e could be x:1 or x:{$gt:1} + + if ( strcmp( e.fieldName(), "$where" ) == 0 ) { + continue; + } + + if ( strcmp( e.fieldName(), "$or" ) == 0 ) { + continue; + } + + if ( strcmp( e.fieldName(), "$nor" ) == 0 ) { + continue; + } + + processQueryField( e, optimize ); + } + } + + FieldRangeOrSet::FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize ) + : _baseSet( ns, query, optimize ), _orFound() { + + BSONObjIterator i( _baseSet._queries[ 0 ] ); while( i.more() ) { BSONElement e = i.next(); - // e could be x:1 or x:{$gt:1} - - if ( strcmp( e.fieldName(), "$where" ) == 0 ) + if ( strcmp( e.fieldName(), "$or" ) == 0 ) { + massert( 13262, "$or requires nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 ); + BSONObjIterator j( e.embeddedObject() ); + while( j.more() ) { + BSONElement f = j.next(); + massert( 13263, "$or array must contain objects", f.type() == Object ); + _orSets.push_back( FieldRangeSet( ns, f.embeddedObject(), optimize ) ); + massert( 13291, "$or may not contain 'special' query", _orSets.back().getSpecial().empty() ); + } + _orFound = true; continue; - - bool equality = ( getGtLtOp( e ) == BSONObj::Equality ); - if ( equality && e.type() == Object ) { - equality = ( strcmp( e.embeddedObject().firstElement().fieldName(), "$not" ) != 0 ); - } - - if ( equality || ( e.type() == Object && !e.embeddedObject()[ "$regex" ].eoo() ) ) { - ranges_[ e.fieldName() ] &= FieldRange( e , false , optimize ); - } - if ( !equality ) { - BSONObjIterator j( e.embeddedObject() ); - while( j.more() ) { - BSONElement f = j.next(); - if ( strcmp( f.fieldName(), "$not" ) == 0 ) { - switch( f.type() ) { - case Object: { - BSONObjIterator k( f.embeddedObject() ); - while( k.more() ) { - BSONElement g = k.next(); - uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality ); - processOpElement( e.fieldName(), g, true, optimize ); - } - break; - } - case RegEx: - processOpElement( e.fieldName(), f, true, optimize ); - break; - default: - uassert( 13041, "invalid use of $not", false ); - } - } else { - processOpElement( e.fieldName(), f, false, optimize ); - } - } } } } @@ -545,8 +649,8 @@ BSONObj fields = _fields; if ( fields.isEmpty() ) { BSONObjBuilder b; - for( map< string, FieldRange >::const_iterator i = ranges_.begin(); i != ranges_.end(); ++i ) { - b.append( i->first.c_str(), 1 ); + for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + b.append( i->first, 1 ); } fields = b.obj(); } @@ -555,17 +659,19 @@ while( i.more() ) { BSONElement e = i.next(); const char *name = e.fieldName(); - const FieldRange &range = ranges_[ name ]; + const FieldRange &range = _ranges[ name ]; assert( !range.empty() ); if ( range.equality() ) b.appendAs( range.min(), name ); else if ( range.nontrivial() ) { + BSONObj o; BSONObjBuilder c; if ( range.min().type() != MinKey ) c.appendAs( range.min(), range.minInclusive() ? "$gte" : "$gt" ); if ( range.max().type() != MaxKey ) c.appendAs( range.max(), range.maxInclusive() ? "$lte" : "$lt" ); - b.append( name, c.done() ); + o = c.obj(); + b.append( name, o ); } } return b.obj(); @@ -573,58 +679,73 @@ QueryPattern FieldRangeSet::pattern( const BSONObj &sort ) const { QueryPattern qp; - for( map< string, FieldRange >::const_iterator i = ranges_.begin(); i != ranges_.end(); ++i ) { + for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { assert( !i->second.empty() ); if ( i->second.equality() ) { - qp.fieldTypes_[ i->first ] = QueryPattern::Equality; + qp._fieldTypes[ i->first ] = QueryPattern::Equality; } else if ( i->second.nontrivial() ) { bool upper = i->second.max().type() != MaxKey; bool lower = i->second.min().type() != MinKey; if ( upper && lower ) - qp.fieldTypes_[ i->first ] = QueryPattern::UpperAndLowerBound; + qp._fieldTypes[ i->first ] = QueryPattern::UpperAndLowerBound; else if ( upper ) - qp.fieldTypes_[ i->first ] = QueryPattern::UpperBound; + qp._fieldTypes[ i->first ] = QueryPattern::UpperBound; else if ( lower ) - qp.fieldTypes_[ i->first ] = QueryPattern::LowerBound; + qp._fieldTypes[ i->first ] = QueryPattern::LowerBound; } } qp.setSort( sort ); return qp; } + // TODO get rid of this BoundList FieldRangeSet::indexBounds( const BSONObj &keyPattern, int direction ) const { - BSONObjBuilder equalityBuilder; typedef vector< pair< shared_ptr< BSONObjBuilder >, shared_ptr< BSONObjBuilder > > > BoundBuilders; BoundBuilders builders; + builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) ); BSONObjIterator i( keyPattern ); + bool ineq = false; // until ineq is true, we are just dealing with equality and $in bounds while( i.more() ) { BSONElement e = i.next(); const FieldRange &fr = range( e.fieldName() ); int number = (int) e.number(); // returns 0.0 if not numeric bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 ); - if ( builders.empty() ) { + if ( !ineq ) { if ( fr.equality() ) { - equalityBuilder.appendAs( fr.min(), "" ); + for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) { + j->first->appendAs( fr.min(), "" ); + j->second->appendAs( fr.min(), "" ); + } } else { - BSONObj equalityObj = equalityBuilder.done(); + if ( !fr.inQuery() ) { + ineq = true; + } + BoundBuilders newBuilders; const vector< FieldInterval > &intervals = fr.intervals(); - if ( forward ) { - for( vector< FieldInterval >::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) { - builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) ); - builders.back().first->appendElements( equalityObj ); - builders.back().second->appendElements( equalityObj ); - builders.back().first->appendAs( j->lower_.bound_, "" ); - builders.back().second->appendAs( j->upper_.bound_, "" ); + for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) { + BSONObj first = i->first->obj(); + BSONObj second = i->second->obj(); + if ( forward ) { + for( vector< FieldInterval >::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) { + uassert( 13303, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < 1000000 ); + newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) ); + newBuilders.back().first->appendElements( first ); + newBuilders.back().second->appendElements( second ); + newBuilders.back().first->appendAs( j->_lower._bound, "" ); + newBuilders.back().second->appendAs( j->_upper._bound, "" ); + } + } else { + for( vector< FieldInterval >::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) { + uassert( 13304, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < 1000000 ); + newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) ); + newBuilders.back().first->appendElements( first ); + newBuilders.back().second->appendElements( second ); + newBuilders.back().first->appendAs( j->_upper._bound, "" ); + newBuilders.back().second->appendAs( j->_lower._bound, "" ); + } } - } else { - for( vector< FieldInterval >::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) { - builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) ); - builders.back().first->appendElements( equalityObj ); - builders.back().second->appendElements( equalityObj ); - builders.back().first->appendAs( j->upper_.bound_, "" ); - builders.back().second->appendAs( j->lower_.bound_, "" ); - } } + builders = newBuilders; } } else { for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) { @@ -633,19 +754,12 @@ } } } - if ( builders.empty() ) { - BSONObj equalityObj = equalityBuilder.done(); - assert( !equalityObj.isEmpty() ); - builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) ); - builders.back().first->appendElements( equalityObj ); - builders.back().second->appendElements( equalityObj ); - } BoundList ret; for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) ret.push_back( make_pair( i->first->obj(), i->second->obj() ) ); return ret; - } - + } + /////////////////// // FieldMatcher // /////////////////// @@ -658,16 +772,51 @@ int true_false = -1; while ( i.more() ){ BSONElement e = i.next(); - add (e.fieldName(), e.trueValue()); - // validate input - if (true_false == -1){ - true_false = e.trueValue(); - _include = !e.trueValue(); - } - else{ - uassert( 10053 , "You cannot currently mix including and excluding fields. Contact us if this is an issue." , - (bool)true_false == e.trueValue() ); + if (e.type() == Object){ + BSONObj obj = e.embeddedObject(); + BSONElement e2 = obj.firstElement(); + if ( strcmp(e2.fieldName(), "$slice") == 0 ){ + if (e2.isNumber()){ + int i = e2.numberInt(); + if (i < 0) + add(e.fieldName(), i, -i); // limit is now positive + else + add(e.fieldName(), 0, i); + + } else if (e2.type() == Array) { + BSONObj arr = e2.embeddedObject(); + uassert(13099, "$slice array wrong size", arr.nFields() == 2 ); + + BSONObjIterator it(arr); + int skip = it.next().numberInt(); + int limit = it.next().numberInt(); + uassert(13100, "$slice limit must be positive", limit > 0 ); + add(e.fieldName(), skip, limit); + + } else { + uassert(13098, "$slice only supports numbers and [skip, limit] arrays", false); + } + } else { + uassert(13097, string("Unsupported projection option: ") + obj.firstElement().fieldName(), false); + } + + } else if (!strcmp(e.fieldName(), "_id") && !e.trueValue()){ + _includeID = false; + + } else { + + add (e.fieldName(), e.trueValue()); + + // validate input + if (true_false == -1){ + true_false = e.trueValue(); + _include = !e.trueValue(); + } + else{ + uassert( 10053 , "You cannot currently mix including and excluding fields. Contact us if this is an issue." , + (bool)true_false == e.trueValue() ); + } } } } @@ -676,34 +825,71 @@ if (field.empty()){ // this is the field the user referred to _include = include; } else { + _include = !include; + const size_t dot = field.find('.'); const string subfield = field.substr(0,dot); const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos)); boost::shared_ptr& fm = _fields[subfield]; if (!fm) - fm.reset(new FieldMatcher(!include)); + fm.reset(new FieldMatcher()); fm->add(rest, include); } } + void FieldMatcher::add(const string& field, int skip, int limit){ + _special = true; // can't include or exclude whole object + + if (field.empty()){ // this is the field the user referred to + _skip = skip; + _limit = limit; + } else { + const size_t dot = field.find('.'); + const string subfield = field.substr(0,dot); + const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos)); + + boost::shared_ptr& fm = _fields[subfield]; + if (!fm) + fm.reset(new FieldMatcher()); + + fm->add(rest, skip, limit); + } + } + BSONObj FieldMatcher::getSpec() const{ return _source; } //b will be the value part of an array-typed BSONElement - void FieldMatcher::appendArray( BSONObjBuilder& b , const BSONObj& a ) const { + void FieldMatcher::appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested) const { + int skip = nested ? 0 : _skip; + int limit = nested ? -1 : _limit; + + if (skip < 0){ + skip = max(0, skip + a.nFields()); + } + int i=0; BSONObjIterator it(a); while (it.more()){ BSONElement e = it.next(); + if (skip){ + skip--; + continue; + } + + if (limit != -1 && (limit-- == 0)){ + break; + } + switch(e.type()){ case Array:{ BSONObjBuilder subb; - appendArray(subb , e.embeddedObject()); - b.appendArray(b.numStr(i++).c_str(), subb.obj()); + appendArray(subb , e.embeddedObject(), true); + b.appendArray(b.numStr(i++), subb.obj()); break; } case Object:{ @@ -717,10 +903,8 @@ } default: if (_include) - b.appendAs(e, b.numStr(i++).c_str()); + b.appendAs(e, b.numStr(i++)); } - - } } @@ -734,7 +918,7 @@ else { FieldMatcher& subfm = *field->second; - if (subfm._fields.empty() || !(e.type()==Object || e.type()==Array) ){ + if ((subfm._fields.empty() && !subfm._special) || !(e.type()==Object || e.type()==Array) ){ if (subfm._include) b.append(e); } @@ -755,6 +939,193 @@ } } + bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const { + int l = matchingLowElement( e, i, forward ); + return ( l % 2 == 0 ); // if we're inside an interval + } + + // binary search for interval containing the specified element + // an even return value indicates that the element is contained within a valid interval + int FieldRangeVector::matchingLowElement( const BSONElement &e, int i, bool forward ) const { + int l = -1; + int h = _ranges[ i ].intervals().size() * 2; + while( l + 1 < h ) { + int m = ( l + h ) / 2; + BSONElement toCmp; + if ( m % 2 == 0 ) { + toCmp = _ranges[ i ].intervals()[ m / 2 ]._lower._bound; + } else { + toCmp = _ranges[ i ].intervals()[ m / 2 ]._upper._bound; + } + int cmp = toCmp.woCompare( e, false ); + if ( !forward ) { + cmp = -cmp; + } + if ( cmp < 0 ) { + l = m; + } else if ( cmp > 0 ) { + h = m; + } else { + return ( m % 2 == 0 ) ? m : m - 1; + } + } + assert( l + 1 == h ); + return l; + } + + bool FieldRangeVector::matches( const BSONObj &obj ) const { + BSONObjIterator k( _keyPattern ); + for( int i = 0; i < (int)_ranges.size(); ++i ) { + if ( _ranges[ i ].empty() ) { + return false; + } + BSONElement kk = k.next(); + int number = (int) kk.number(); + bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0; + BSONElement e = obj.getField( kk.fieldName() ); + if ( e.eoo() ) { + e = staticNull.firstElement(); + } + if ( e.type() == Array ) { + BSONObjIterator j( e.embeddedObject() ); + bool match = false; + while( j.more() ) { + if ( matchesElement( j.next(), i, forward ) ) { + match = true; + break; + } + } + if ( !match ) { + return false; + } + } else if ( !matchesElement( e, i, forward ) ) { + return false; + } + } + return true; + } + + // TODO optimize more + int FieldRangeVector::Iterator::advance( const BSONObj &curr ) { + BSONObjIterator j( curr ); + BSONObjIterator o( _v._keyPattern ); + // track first field for which we are not at the end of the valid values, + // since we may need to advance from the key prefix ending with this field + int latestNonEndpoint = -1; + // iterate over fields to determine appropriate advance method + for( int i = 0; i < (int)_i.size(); ++i ) { + if ( i > 0 && !_v._ranges[ i - 1 ].intervals()[ _i[ i - 1 ] ].equality() ) { + // if last bound was inequality, we don't know anything about where we are for this field + // TODO if possible avoid this certain cases when field in prev key is the same + setMinus( i ); + } + bool eq = false; + BSONElement oo = o.next(); + bool reverse = ( ( oo.number() < 0 ) ^ ( _v._direction < 0 ) ); + BSONElement jj = j.next(); + if ( _i[ i ] == -1 ) { // unknown position for this field, do binary search + int l = _v.matchingLowElement( jj, i, !reverse ); + if ( l % 2 == 0 ) { // we are in a valid range for this field + _i[ i ] = l / 2; + int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ]; + if ( diff > 1 ) { + latestNonEndpoint = i; + } else if ( diff == 1 ) { + int x = _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._bound.woCompare( jj, false ); + if ( x != 0 ) { + latestNonEndpoint = i; + } + } + continue; + } else { // not in a valid range for this field - determine if and how to advance + // check if we're after the last interval for this field + if ( l == (int)_v._ranges[ i ].intervals().size() * 2 - 1 ) { + if ( latestNonEndpoint == -1 ) { + return -2; + } + setZero( latestNonEndpoint + 1 ); + // skip to curr / latestNonEndpoint + 1 / superlative + for( int j = latestNonEndpoint + 1; j < (int)_i.size(); ++j ) { + _cmp[ j ] = _superlative[ j ]; + } + return latestNonEndpoint + 1; + } + _i[ i ] = ( l + 1 ) / 2; + // skip to curr / i / nextbounds + _cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound; + for( int j = i + 1; j < (int)_i.size(); ++j ) { + _cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound; + } + return i; + } + } + bool first = true; + // _i[ i ] != -1, so we have a starting interval for this field + // which serves as a lower/equal bound on the first iteration - + // we advance from this interval to find a matching interval + while( _i[ i ] < (int)_v._ranges[ i ].intervals().size() ) { + // compare to current interval's upper bound + int x = _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._bound.woCompare( jj, false ); + if ( reverse ) { + x = -x; + } + if ( x == 0 ) { + eq = true; + break; + } + // see if we're less than the upper bound + if ( x > 0 ) { + if ( i == 0 && first ) { + // the value of 1st field won't go backward, so don't check lower bound + // TODO maybe we can check first only? + break; + } + // if it's an equality interval, don't need to compare separately to lower bound + if ( !_v._ranges[ i ].intervals()[ _i[ i ] ].equality() ) { + // compare to current interval's lower bound + x = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound.woCompare( jj, false ); + if ( reverse ) { + x = -x; + } + } + // if we're less than the lower bound, advance + if ( x > 0 ) { + setZero( i + 1 ); + // skip to curr / i / nextbounds + _cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound; + for( int j = i + 1; j < (int)_i.size(); ++j ) { + _cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound; + } + return i; + } else { + break; + } + } + // we're above the upper bound, so try next interval and reset remaining fields + ++_i[ i ]; + setZero( i + 1 ); + first = false; + } + int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ]; + if ( diff > 1 || ( !eq && diff == 1 ) ) { + // check if we're not at the end of valid values for this field + latestNonEndpoint = i; + } else if ( diff == 0 ) { // check if we're past the last interval for this field + if ( latestNonEndpoint == -1 ) { + return -2; + } + // more values possible, skip... + setZero( latestNonEndpoint + 1 ); + // skip to curr / latestNonEndpoint + 1 / superlative + for( int j = latestNonEndpoint + 1; j < (int)_i.size(); ++j ) { + _cmp[ j ] = _superlative[ j ]; + } + return latestNonEndpoint + 1; + } + } + return -1; + } + struct SimpleRegexUnitTest : UnitTest { void run(){ { @@ -783,22 +1154,88 @@ } { BSONObjBuilder b; + b.appendRegex("r", "\\Af", ""); + BSONObj o = b.done(); + assert( simpleRegex(o.firstElement()) == "f" ); + } + { + BSONObjBuilder b; b.appendRegex("r", "^f", "m"); BSONObj o = b.done(); + assert( simpleRegex(o.firstElement()) == "" ); + } + { + BSONObjBuilder b; + b.appendRegex("r", "\\Af", "m"); + BSONObj o = b.done(); assert( simpleRegex(o.firstElement()) == "f" ); } { BSONObjBuilder b; - b.appendRegex("r", "^f", "mi"); + b.appendRegex("r", "\\Af", "mi"); BSONObj o = b.done(); assert( simpleRegex(o.firstElement()) == "" ); } { BSONObjBuilder b; - b.appendRegex("r", "^f \t\vo\n\ro \\ \\# #comment", "mx"); + b.appendRegex("r", "\\Af \t\vo\n\ro \\ \\# #comment", "mx"); BSONObj o = b.done(); assert( simpleRegex(o.firstElement()) == "foo #" ); } } } simple_regex_unittest; + + + long long applySkipLimit( long long num , const BSONObj& cmd ){ + BSONElement s = cmd["skip"]; + BSONElement l = cmd["limit"]; + + if ( s.isNumber() ){ + num = num - s.numberLong(); + if ( num < 0 ) { + num = 0; + } + } + + if ( l.isNumber() ){ + long long limit = l.numberLong(); + if ( limit < num ){ + num = limit; + } + } + + return num; + } + + string debugString( Message& m ){ + stringstream ss; + ss << "op: " << opToString( m.operation() ) << " len: " << m.size(); + if ( m.operation() >= 2000 && m.operation() < 2100 ){ + DbMessage d(m); + ss << " ns: " << d.getns(); + switch ( m.operation() ){ + case dbUpdate: { + int flags = d.pullInt(); + BSONObj q = d.nextJsObj(); + ss << " flags: " << flags << " query: " << q; + break; + } + case dbInsert: + ss << d.nextJsObj(); + break; + case dbDelete: { + int flags = d.pullInt(); + BSONObj q = d.nextJsObj(); + ss << " flags: " << flags << " query: " << q; + break; + } + default: + ss << " CANNOT HANDLE YET"; + } + + + } + return ss.str(); + } + } // namespace mongo diff -Nru mongodb-1.4.4/db/queryutil.h mongodb-1.6.3/db/queryutil.h --- mongodb-1.4.4/db/queryutil.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/queryutil.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,26 +22,34 @@ namespace mongo { struct FieldBound { - BSONElement bound_; - bool inclusive_; + BSONElement _bound; + bool _inclusive; bool operator==( const FieldBound &other ) const { - return bound_.woCompare( other.bound_ ) == 0 && - inclusive_ == other.inclusive_; + return _bound.woCompare( other._bound ) == 0 && + _inclusive == other._inclusive; } + void flipInclusive() { _inclusive = !_inclusive; } }; struct FieldInterval { - FieldInterval(){} - FieldInterval( const BSONElement& e ){ - lower_.bound_ = upper_.bound_ = e; - lower_.inclusive_ = upper_.inclusive_ = true; - } - FieldBound lower_; - FieldBound upper_; - bool valid() const { - int cmp = lower_.bound_.woCompare( upper_.bound_, false ); - return ( cmp < 0 || ( cmp == 0 && lower_.inclusive_ && upper_.inclusive_ ) ); + FieldInterval() : _cachedEquality( -1 ) {} + FieldInterval( const BSONElement& e ) : _cachedEquality( -1 ) { + _lower._bound = _upper._bound = e; + _lower._inclusive = _upper._inclusive = true; + } + FieldBound _lower; + FieldBound _upper; + bool strictValid() const { + int cmp = _lower._bound.woCompare( _upper._bound, false ); + return ( cmp < 0 || ( cmp == 0 && _lower._inclusive && _upper._inclusive ) ); } + bool equality() const { + if ( _cachedEquality == -1 ) { + _cachedEquality = ( _lower._inclusive && _upper._inclusive && _lower._bound.woCompare( _upper._bound, false ) == 0 ); + } + return _cachedEquality; + } + mutable int _cachedEquality; }; // range of a field's value that may be determined from query -- used to @@ -50,11 +58,16 @@ public: FieldRange( const BSONElement &e = BSONObj().firstElement() , bool isNot=false , bool optimize=true ); const FieldRange &operator&=( const FieldRange &other ); - const FieldRange &operator|=( const FieldRange &other ); - BSONElement min() const { assert( !empty() ); return intervals_[ 0 ].lower_.bound_; } - BSONElement max() const { assert( !empty() ); return intervals_[ intervals_.size() - 1 ].upper_.bound_; } - bool minInclusive() const { assert( !empty() ); return intervals_[ 0 ].lower_.inclusive_; } - bool maxInclusive() const { assert( !empty() ); return intervals_[ intervals_.size() - 1 ].upper_.inclusive_; } + const FieldRange &operator|=( const FieldRange &other ); + // does not remove fully contained ranges (eg [1,3] - [2,2] doesn't remove anything) + // in future we can change so that an or on $in:[3] combined with $in:{$gt:2} doesn't scan 3 a second time + const FieldRange &operator-=( const FieldRange &other ); + // true iff other includes this + bool operator<=( const FieldRange &other ); + BSONElement min() const { assert( !empty() ); return _intervals[ 0 ]._lower._bound; } + BSONElement max() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._bound; } + bool minInclusive() const { assert( !empty() ); return _intervals[ 0 ]._lower._inclusive; } + bool maxInclusive() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._inclusive; } bool equality() const { return !empty() && @@ -62,20 +75,51 @@ maxInclusive() && minInclusive(); } + bool inQuery() const { + if ( equality() ) { + return true; + } + for( vector< FieldInterval >::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) { + if ( !i->equality() ) { + return false; + } + } + return true; + } bool nontrivial() const { return ! empty() && ( minKey.firstElement().woCompare( min(), false ) != 0 || maxKey.firstElement().woCompare( max(), false ) != 0 ); } - bool empty() const { return intervals_.empty(); } - const vector< FieldInterval > &intervals() const { return intervals_; } + bool empty() const { return _intervals.empty(); } + void makeEmpty() { _intervals.clear(); } + const vector< FieldInterval > &intervals() const { return _intervals; } string getSpecial() const { return _special; } - + void setExclusiveBounds() { + for( vector< FieldInterval >::iterator i = _intervals.begin(); i != _intervals.end(); ++i ) { + i->_lower._inclusive = false; + i->_upper._inclusive = false; + } + } + // constructs a range which is the reverse of the current one + // note - the resulting intervals may not be strictValid() + void reverse( FieldRange &ret ) const { + assert( _special.empty() ); + ret._intervals.clear(); + ret._objData = _objData; + for( vector< FieldInterval >::const_reverse_iterator i = _intervals.rbegin(); i != _intervals.rend(); ++i ) { + FieldInterval fi; + fi._lower = i->_upper; + fi._upper = i->_lower; + ret._intervals.push_back( fi ); + } + } private: BSONObj addObj( const BSONObj &o ); - vector< FieldInterval > intervals_; - vector< BSONObj > objData_; + void finishOperation( const vector< FieldInterval > &newIntervals, const FieldRange &other ); + vector< FieldInterval > _intervals; + vector< BSONObj > _objData; string _special; }; @@ -101,10 +145,10 @@ return !operator==( other ); } bool operator<( const QueryPattern &other ) const { - map< string, Type >::const_iterator i = fieldTypes_.begin(); - map< string, Type >::const_iterator j = other.fieldTypes_.begin(); - while( i != fieldTypes_.end() ) { - if ( j == other.fieldTypes_.end() ) + map< string, Type >::const_iterator i = _fieldTypes.begin(); + map< string, Type >::const_iterator j = other._fieldTypes.begin(); + while( i != _fieldTypes.end() ) { + if ( j == other._fieldTypes.end() ) return false; if ( i->first < j->first ) return true; @@ -117,14 +161,14 @@ ++i; ++j; } - if ( j != other.fieldTypes_.end() ) + if ( j != other._fieldTypes.end() ) return true; - return sort_.woCompare( other.sort_ ) < 0; + return _sort.woCompare( other._sort ) < 0; } private: QueryPattern() {} void setSort( const BSONObj sort ) { - sort_ = normalizeSort( sort ); + _sort = normalizeSort( sort ); } BSONObj static normalizeSort( const BSONObj &spec ) { if ( spec.isEmpty() ) @@ -140,73 +184,376 @@ } return b.obj(); } - map< string, Type > fieldTypes_; - BSONObj sort_; + map< string, Type > _fieldTypes; + BSONObj _sort; }; + + // a BoundList contains intervals specified by inclusive start + // and end bounds. The intervals should be nonoverlapping and occur in + // the specified direction of traversal. For example, given a simple index {i:1} + // and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList + // would be valid for index {i:-1} with direction -1. + typedef vector< pair< BSONObj, BSONObj > > BoundList; // ranges of fields' value that may be determined from query -- used to // determine index limits class FieldRangeSet { public: + friend class FieldRangeOrSet; + friend class FieldRangeVector; FieldRangeSet( const char *ns, const BSONObj &query , bool optimize=true ); + bool hasRange( const char *fieldName ) const { + map< string, FieldRange >::const_iterator f = _ranges.find( fieldName ); + return f != _ranges.end(); + } const FieldRange &range( const char *fieldName ) const { - map< string, FieldRange >::const_iterator f = ranges_.find( fieldName ); - if ( f == ranges_.end() ) + map< string, FieldRange >::const_iterator f = _ranges.find( fieldName ); + if ( f == _ranges.end() ) + return trivialRange(); + return f->second; + } + FieldRange &range( const char *fieldName ) { + map< string, FieldRange >::iterator f = _ranges.find( fieldName ); + if ( f == _ranges.end() ) return trivialRange(); - return f->second; + return f->second; } int nNontrivialRanges() const { int count = 0; - for( map< string, FieldRange >::const_iterator i = ranges_.begin(); i != ranges_.end(); ++i ) + for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) if ( i->second.nontrivial() ) ++count; return count; } - const char *ns() const { return ns_; } - BSONObj query() const { return query_; } + const char *ns() const { return _ns; } // if fields is specified, order fields of returned object to match those of 'fields' BSONObj simplifiedQuery( const BSONObj &fields = BSONObj() ) const; bool matchPossible() const { - for( map< string, FieldRange >::const_iterator i = ranges_.begin(); i != ranges_.end(); ++i ) + for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) if ( i->second.empty() ) return false; return true; } QueryPattern pattern( const BSONObj &sort = BSONObj() ) const; - BoundList indexBounds( const BSONObj &keyPattern, int direction ) const; string getSpecial() const; + const FieldRangeSet &operator-=( const FieldRangeSet &other ) { + int nUnincluded = 0; + string unincludedKey; + map< string, FieldRange >::iterator i = _ranges.begin(); + map< string, FieldRange >::const_iterator j = other._ranges.begin(); + while( nUnincluded < 2 && i != _ranges.end() && j != other._ranges.end() ) { + int cmp = i->first.compare( j->first ); + if ( cmp == 0 ) { + if ( i->second <= j->second ) { + // nothing + } else { + ++nUnincluded; + unincludedKey = i->first; + } + ++i; + ++j; + } else if ( cmp < 0 ) { + ++i; + } else { + // other has a bound we don't, nothing can be done + return *this; + } + } + if ( j != other._ranges.end() ) { + // other has a bound we don't, nothing can be done + return *this; + } + if ( nUnincluded > 1 ) { + return *this; + } + if ( nUnincluded == 0 ) { + makeEmpty(); + return *this; + } + // nUnincluded == 1 + _ranges[ unincludedKey ] -= other._ranges[ unincludedKey ]; + appendQueries( other ); + return *this; + } + const FieldRangeSet &operator&=( const FieldRangeSet &other ) { + map< string, FieldRange >::iterator i = _ranges.begin(); + map< string, FieldRange >::const_iterator j = other._ranges.begin(); + while( i != _ranges.end() && j != other._ranges.end() ) { + int cmp = i->first.compare( j->first ); + if ( cmp == 0 ) { + i->second &= j->second; + ++i; + ++j; + } else if ( cmp < 0 ) { + ++i; + } else { + _ranges[ j->first ] = j->second; + ++j; + } + } + while( j != other._ranges.end() ) { + _ranges[ j->first ] = j->second; + ++j; + } + appendQueries( other ); + return *this; + } + // TODO get rid of this + BoundList indexBounds( const BSONObj &keyPattern, int direction ) const; private: + void appendQueries( const FieldRangeSet &other ) { + for( vector< BSONObj >::const_iterator i = other._queries.begin(); i != other._queries.end(); ++i ) { + _queries.push_back( *i ); + } + } + void makeEmpty() { + for( map< string, FieldRange >::iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + i->second.makeEmpty(); + } + } + void processQueryField( const BSONElement &e, bool optimize ); void processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize ); static FieldRange *trivialRange_; static FieldRange &trivialRange(); - mutable map< string, FieldRange > ranges_; - const char *ns_; - BSONObj query_; + mutable map< string, FieldRange > _ranges; + const char *_ns; + // make sure memory for FieldRange BSONElements is owned + vector< BSONObj > _queries; }; + class FieldRangeVector { + public: + FieldRangeVector( const FieldRangeSet &frs, const BSONObj &keyPattern, int direction ) + :_keyPattern( keyPattern ), _direction( direction >= 0 ? 1 : -1 ) + { + _queries = frs._queries; + BSONObjIterator i( _keyPattern ); + while( i.more() ) { + BSONElement e = i.next(); + int number = (int) e.number(); // returns 0.0 if not numeric + bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 ); + if ( forward ) { + _ranges.push_back( frs.range( e.fieldName() ) ); + } else { + _ranges.push_back( FieldRange() ); + frs.range( e.fieldName() ).reverse( _ranges.back() ); + } + assert( !_ranges.back().empty() ); + } + uassert( 13385, "combinatorial limit of $in partitioning of result set exceeded", size() < 1000000 ); + } + long long size() { + long long ret = 1; + for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + ret *= i->intervals().size(); + } + return ret; + } + BSONObj startKey() const { + BSONObjBuilder b; + for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + const FieldInterval &fi = i->intervals().front(); + b.appendAs( fi._lower._bound, "" ); + } + return b.obj(); + } + BSONObj endKey() const { + BSONObjBuilder b; + for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + const FieldInterval &fi = i->intervals().back(); + b.appendAs( fi._upper._bound, "" ); + } + return b.obj(); + } + BSONObj obj() const { + BSONObjBuilder b; + BSONObjIterator k( _keyPattern ); + for( int i = 0; i < (int)_ranges.size(); ++i ) { + BSONArrayBuilder a( b.subarrayStart( k.next().fieldName() ) ); + for( vector< FieldInterval >::const_iterator j = _ranges[ i ].intervals().begin(); + j != _ranges[ i ].intervals().end(); ++j ) { + a << BSONArray( BSON_ARRAY( j->_lower._bound << j->_upper._bound ).clientReadable() ); + } + a.done(); + } + return b.obj(); + } + bool matches( const BSONObj &obj ) const; + class Iterator { + public: + Iterator( const FieldRangeVector &v ) : _v( v ), _i( _v._ranges.size(), -1 ), _cmp( _v._ranges.size(), 0 ), _superlative( _v._ranges.size(), 0 ) { + static BSONObj minObj = minObject(); + static BSONElement minElt = minObj.firstElement(); + static BSONObj maxObj = maxObject(); + static BSONElement maxElt = maxObj.firstElement(); + BSONObjIterator i( _v._keyPattern ); + for( int j = 0; j < (int)_superlative.size(); ++j ) { + int number = (int) i.next().number(); + bool forward = ( ( number >= 0 ? 1 : -1 ) * ( _v._direction >= 0 ? 1 : -1 ) > 0 ); + _superlative[ j ] = forward ? &maxElt : &minElt; + } + } + static BSONObj minObject() { + BSONObjBuilder b; + b.appendMinKey( "" ); + return b.obj(); + } + static BSONObj maxObject() { + BSONObjBuilder b; + b.appendMaxKey( "" ); + return b.obj(); + } + bool advance() { + int i = _i.size() - 1; + while( i >= 0 && _i[ i ] >= ( (int)_v._ranges[ i ].intervals().size() - 1 ) ) { + --i; + } + if( i >= 0 ) { + _i[ i ]++; + for( unsigned j = i + 1; j < _i.size(); ++j ) { + _i[ j ] = 0; + } + } else { + _i[ 0 ] = _v._ranges[ 0 ].intervals().size(); + } + return ok(); + } + // return value + // -2 end of iteration + // -1 no skipping + // >= 0 skip parameter + int advance( const BSONObj &curr ); + const vector< const BSONElement * > &cmp() const { return _cmp; } + void setZero( int i ) { + for( int j = i; j < (int)_i.size(); ++j ) { + _i[ j ] = 0; + } + } + void setMinus( int i ) { + for( int j = i; j < (int)_i.size(); ++j ) { + _i[ j ] = -1; + } + } + bool ok() { + return _i[ 0 ] < (int)_v._ranges[ 0 ].intervals().size(); + } + BSONObj startKey() { + BSONObjBuilder b; + for( int unsigned i = 0; i < _i.size(); ++i ) { + const FieldInterval &fi = _v._ranges[ i ].intervals()[ _i[ i ] ]; + b.appendAs( fi._lower._bound, "" ); + } + return b.obj(); + } + // temp + BSONObj endKey() { + BSONObjBuilder b; + for( int unsigned i = 0; i < _i.size(); ++i ) { + const FieldInterval &fi = _v._ranges[ i ].intervals()[ _i[ i ] ]; + b.appendAs( fi._upper._bound, "" ); + } + return b.obj(); + } + // check + private: + const FieldRangeVector &_v; + vector< int > _i; + vector< const BSONElement* > _cmp; + vector< const BSONElement* > _superlative; + }; + private: + int matchingLowElement( const BSONElement &e, int i, bool direction ) const; + bool matchesElement( const BSONElement &e, int i, bool direction ) const; + vector< FieldRange > _ranges; + BSONObj _keyPattern; + int _direction; + vector< BSONObj > _queries; // make sure mem owned + }; + + // generages FieldRangeSet objects, accounting for or clauses + class FieldRangeOrSet { + public: + FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize=true ); + // if there's a useless or clause, we won't use or ranges to help with scanning + bool orFinished() const { return _orFound && _orSets.empty(); } + // removes first or clause, and removes the field ranges it covers from all subsequent or clauses + // this could invalidate the result of the last topFrs() + void popOrClause() { + massert( 13274, "no or clause to pop", !orFinished() ); + const FieldRangeSet &toPop = _orSets.front(); + list< FieldRangeSet >::iterator i = _orSets.begin(); + ++i; + while( i != _orSets.end() ) { + *i -= toPop; + if( !i->matchPossible() ) { + i = _orSets.erase( i ); + } else { + ++i; + } + } + _oldOrSets.push_front( toPop ); + _orSets.pop_front(); + } + FieldRangeSet *topFrs() const { + FieldRangeSet *ret = new FieldRangeSet( _baseSet ); + if (_orSets.size()){ + *ret &= _orSets.front(); + } + return ret; + } + void allClausesSimplified( vector< BSONObj > &ret ) const { + for( list< FieldRangeSet >::const_iterator i = _orSets.begin(); i != _orSets.end(); ++i ) { + if ( i->matchPossible() ) { + ret.push_back( i->simplifiedQuery() ); + } + } + } + string getSpecial() const { return _baseSet.getSpecial(); } + + bool moreOrClauses() const { return !_orSets.empty(); } + private: + FieldRangeSet _baseSet; + list< FieldRangeSet > _orSets; + list< FieldRangeSet > _oldOrSets; // make sure memory is owned + bool _orFound; + }; + /** used for doing field limiting */ class FieldMatcher { public: - - FieldMatcher(bool include=false) : _include(include){} + FieldMatcher() + : _include(true) + , _special(false) + , _includeID(true) + , _skip(0) + , _limit(-1) + {} void add( const BSONObj& o ); void append( BSONObjBuilder& b , const BSONElement& e ) const; BSONObj getSpec() const; + bool includeID() { return _includeID; } private: void add( const string& field, bool include ); - void appendArray( BSONObjBuilder& b , const BSONObj& a ) const; + void add( const string& field, int skip, int limit ); + void appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested=false) const; bool _include; // true if default at this level is to include + bool _special; // true if this level can't be skipped or included without recursing //TODO: benchmark vector vs map typedef map > FieldMap; FieldMap _fields; BSONObj _source; + bool _includeID; + + // used for $slice operator + int _skip; + int _limit; }; /** returns a string that when used as a matcher, would match a super set of regex() @@ -220,4 +567,6 @@ /** returns the upper bound of a query that matches prefix */ string simpleRegexEnd( string prefix ); + long long applySkipLimit( long long num , const BSONObj& cmd ); + } // namespace mongo diff -Nru mongodb-1.4.4/db/reccache.cpp mongodb-1.6.3/db/reccache.cpp --- mongodb-1.4.4/db/reccache.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/reccache.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,15 +16,17 @@ // storage.cpp -#include "stdafx.h" +#include "pch.h" #include "pdfile.h" -#include "reccache.h" +//#include "reccache.h" #include "rec.h" #include "db.h" +#error deprecated - do not include in project + namespace mongo { -RecCache theRecCache(BucketSize); +//RecCache theRecCache(BucketSize); // 100k * 8KB = 800MB unsigned RecCache::MAXNODES = 50000; diff -Nru mongodb-1.4.4/db/reccache.h mongodb-1.6.3/db/reccache.h --- mongodb-1.4.4/db/reccache.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/reccache.h 2010-09-24 10:02:42.000000000 -0700 @@ -32,6 +32,8 @@ #pragma once +#error deprecated + #include "reci.h" #include "recstore.h" @@ -212,29 +214,29 @@ class CachedBasicRecStore : public RecStoreInterface { public: - virtual char* get(DiskLoc d, unsigned len) { + VIRT char* get(DiskLoc d, unsigned len) { return theRecCache.get(d, len); } - virtual DiskLoc insert(const char *ns, const void *obuf, int len, bool god) { + VIRT DiskLoc insert(const char *ns, const void *obuf, int len, bool god) { return theRecCache.insert(ns, obuf, len, god); } - virtual void modified(DiskLoc d) { + VIRT void modified(DiskLoc d) { theRecCache.dirty(d); } /* drop collection */ - virtual void drop(const char *ns) { + VIRT void drop(const char *ns) { theRecCache.drop(ns); } - virtual void rename(const char *fromNs, const char *toNs) { + VIRT void rename(const char *fromNs, const char *toNs) { massert( 10378 , "rename not yet implemented for CachedBasicRecStore", false ); } /* close datafiles associated with the db specified. */ - virtual void closeFiles(string dbname, string path) { + VIRT void closeFiles(string dbname, string path) { theRecCache.closeFiles(dbname, dbpath); } }; @@ -244,17 +246,17 @@ call */ -inline void dbunlocking_read() { +//inline void dbunlocking_read() { /* Client *c = currentClient.get(); if ( c ) c->top.clientStop(); */ -} +//} -inline void dbunlocking_write() { - theRecCache.ejectOld(); - dbunlocking_read(); -} +//inline void dbunlocking_write() { + //theRecCache.ejectOld(); +// dbunlocking_read(); +//} } /*namespace*/ diff -Nru mongodb-1.4.4/db/rec.h mongodb-1.6.3/db/rec.h --- mongodb-1.4.4/db/rec.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/rec.h 2010-09-24 10:02:42.000000000 -0700 @@ -28,7 +28,7 @@ #pragma once #include "reci.h" -#include "reccache.h" +//#include "reccache.h" namespace mongo { @@ -41,28 +41,28 @@ class MongoMemMapped_RecStore : public RecStoreInterface { public: - virtual char* get(DiskLoc d, unsigned len) { return d.rec()->data; } + VIRT char* get(DiskLoc d, unsigned len) { return d.rec()->data; } - virtual DiskLoc insert(const char *ns, const void *obuf, int len, bool god) { + VIRT DiskLoc insert(const char *ns, const void *obuf, int len, bool god) { return theDataFileMgr.insert(ns, obuf, len, god); } - virtual void deleteRecord(const char *ns, DiskLoc d) { + VIRT void deleteRecord(const char *ns, DiskLoc d) { theDataFileMgr._deleteRecord(nsdetails_notinline(ns), ns, d.rec(), d); } - virtual void modified(DiskLoc d) { } + VIRT void modified(DiskLoc d) { } - virtual void drop(const char *ns) { + VIRT void drop(const char *ns) { dropNS(ns); } - virtual void rename(const char *fromNs, const char *toNs) { + VIRT void rename(const char *fromNs, const char *toNs) { renameNamespace( fromNs, toNs ); } /* close datafiles associated with the db specified. */ - virtual void closeFiles(string dbname, string path) { + VIRT void closeFiles(string dbname, string path) { /* as this is only used for indexes so far, and we are in the same PDFiles as the nonindex data, we just rely on them having been closed at the same time. one day this may need to change. @@ -116,7 +116,9 @@ /* Glue btree to RecStoreInterface: ---------------------------- */ -extern RecStoreInterface *btreeStore; +typedef MongoMemMapped_RecStore StoreToUse; + +extern StoreToUse *btreeStore; const int BucketSize = 8192; diff -Nru mongodb-1.4.4/db/reci.h mongodb-1.6.3/db/reci.h --- mongodb-1.4.4/db/reci.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/reci.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,35 +22,38 @@ namespace mongo { +// #define VIRT virtual +#define VIRT + /* Subclass this and implement your real storage interface. */ class RecStoreInterface { public: - virtual ~RecStoreInterface() {} + //VIRT ~RecStoreInterface() {} /* Get a pointer to the data at diskloc d. Pointer guaranteed to stay in scope through the current database operation's life. */ - virtual char* get(DiskLoc d, unsigned len) = 0; + //VIRT char* get(DiskLoc d, unsigned len) = 0; /* indicate that the diskloc specified has been updated. note that as-is today, the modification may come AFTER this call -- we handle that currently -- until the dblock finishes. */ - virtual void modified(DiskLoc d) = 0; + //VIRT void modified(DiskLoc d) = 0; /* insert specified data as a record */ - virtual DiskLoc insert(const char *ns, const void *obuf, int len, bool god) = 0; + //VIRT DiskLoc insert(const char *ns, const void *obuf, int len, bool god) = 0; - virtual void deleteRecord(const char *ns, DiskLoc d) { massert( 10379 , "not implemented RecStoreInterface::deleteRecord", false); } + //VIRT void deleteRecord(const char *ns, DiskLoc d) { massert( 10379 , "not implemented RecStoreInterface::deleteRecord", false); } /* drop the collection */ - virtual void drop(const char *ns) = 0; + //VIRT void drop(const char *ns) = 0; /* rename collection */ - virtual void rename(const char *fromNs, const char *toNs) = 0; + //VIRT void rename(const char *fromNs, const char *toNs) = 0; /* close datafiles associated with the db specified. */ - virtual void closeFiles(string dbname, string path) = 0; + //VIRT void closeFiles(string dbname, string path) = 0; /* todo add: closeFiles(dbname) diff -Nru mongodb-1.4.4/db/recstore.h mongodb-1.6.3/db/recstore.h --- mongodb-1.4.4/db/recstore.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/recstore.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,6 +18,8 @@ #pragma once +#error deprecated + #include "../util/file.h" namespace mongo { diff -Nru mongodb-1.4.4/db/repl/connections.h mongodb-1.6.3/db/repl/connections.h --- mongodb-1.4.4/db/repl/connections.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/connections.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,91 @@ +// @file + +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include +#include "../../client/dbclient.h" + +namespace mongo { + + /** here we keep a single connection (with reconnect) for a set of hosts, + one each, and allow one user at a time per host. if in use already for that + host, we block. so this is an easy way to keep a 1-deep pool of connections + that many threads can share. + + thread-safe. + + Example: + { + ScopedConn c("foo.acme.com:9999"); + c->runCommand(...); + } + + throws exception on connect error (but fine to try again later with a new + scopedconn object for same host). + */ + class ScopedConn { + public: + /** throws assertions if connect failure etc. */ + ScopedConn(string hostport); + ~ScopedConn(); + DBClientConnection* operator->(); + private: + auto_ptr connLock; + static mutex mapMutex; + struct X { + mutex z; + DBClientConnection cc; + X() : z("X"), cc(/*reconnect*/true, 0, /*timeout*/10) { + cc._logLevel = 2; + } + } *x; + typedef map M; + static M& _map; + }; + + inline ScopedConn::ScopedConn(string hostport) { + bool first = false; + { + scoped_lock lk(mapMutex); + x = _map[hostport]; + if( x == 0 ) { + x = _map[hostport] = new X(); + first = true; + connLock.reset( new scoped_lock(x->z) ); + } + } + if( !first ) { + connLock.reset( new scoped_lock(x->z) ); + return; + } + + // we already locked above... + string err; + x->cc.connect(hostport, err); + } + + inline ScopedConn::~ScopedConn() { + // conLock releases... + } + + inline DBClientConnection* ScopedConn::operator->() { + return &x->cc; + } + +} diff -Nru mongodb-1.4.4/db/repl/consensus.cpp mongodb-1.6.3/db/repl/consensus.cpp --- mongodb-1.4.4/db/repl/consensus.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/consensus.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,354 @@ +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../commands.h" +#include "rs.h" +#include "multicmd.h" + +namespace mongo { + + class CmdReplSetFresh : public ReplSetCommand { + public: + CmdReplSetFresh() : ReplSetCommand("replSetFresh") { } + private: + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( !check(errmsg, result) ) + return false; + + if( cmdObj["set"].String() != theReplSet->name() ) { + errmsg = "wrong repl set name"; + return false; + } + string who = cmdObj["who"].String(); + int cfgver = cmdObj["cfgver"].Int(); + OpTime opTime(cmdObj["opTime"].Date()); + + bool weAreFresher = false; + if( theReplSet->config().version > cfgver ) { + log() << "replSet member " << who << " is not yet aware its cfg version " << cfgver << " is stale" << rsLog; + result.append("info", "config version stale"); + weAreFresher = true; + } + else if( opTime < theReplSet->lastOpTimeWritten ) { + weAreFresher = true; + } + result.appendDate("opTime", theReplSet->lastOpTimeWritten.asDate()); + result.append("fresher", weAreFresher); + return true; + } + } cmdReplSetFresh; + + class CmdReplSetElect : public ReplSetCommand { + public: + CmdReplSetElect() : ReplSetCommand("replSetElect") { } + private: + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( !check(errmsg, result) ) + return false; + //task::lam f = boost::bind(&Consensus::electCmdReceived, &theReplSet->elect, cmdObj, &result); + //theReplSet->mgr->call(f); + theReplSet->elect.electCmdReceived(cmdObj, &result); + return true; + } + } cmdReplSetElect; + + int Consensus::totalVotes() const { + static int complain = 0; + int vTot = rs._self->config().votes; + for( Member *m = rs.head(); m; m=m->next() ) + vTot += m->config().votes; + if( vTot % 2 == 0 && vTot && complain++ == 0 ) + log() << "replSet warning total number of votes is even - considering giving one member an extra vote" << rsLog; + return vTot; + } + + bool Consensus::aMajoritySeemsToBeUp() const { + int vUp = rs._self->config().votes; + for( Member *m = rs.head(); m; m=m->next() ) + vUp += m->hbinfo().up() ? m->config().votes : 0; + return vUp * 2 > totalVotes(); + } + + bool Consensus::shouldRelinquish() const { + int vUp = rs._self->config().votes; + const long long T = rs.config().ho.heartbeatTimeoutMillis * rs.config().ho.heartbeatConnRetries; + for( Member *m = rs.head(); m; m=m->next() ) { + long long dt = m->hbinfo().timeDown(); + if( dt < T ) + vUp += m->config().votes; + } + return !( vUp * 2 > totalVotes() ); + } + + static const int VETO = -10000; + + const time_t LeaseTime = 30; + + unsigned Consensus::yea(unsigned memberId) /* throws VoteException */ { + Atomic::tran t(ly); + LastYea &ly = t.ref(); + time_t now = time(0); + if( ly.when + LeaseTime >= now && ly.who != memberId ) { + log(1) << "replSet not voting yea for " << memberId << + " voted for " << ly.who << ' ' << now-ly.when << " secs ago" << rsLog; + throw VoteException(); + } + ly.when = now; + ly.who = memberId; + return rs._self->config().votes; + } + + /* we vote for ourself at start of election. once it fails, we can cancel the lease we had in + place instead of leaving it for a long time. + */ + void Consensus::electionFailed(unsigned meid) { + Atomic::tran t(ly); + LastYea &L = t.ref(); + DEV assert( L.who == meid ); // this may not always always hold, so be aware, but adding for now as a quick sanity test + if( L.who == meid ) + L.when = 0; + } + + /* todo: threading **************** !!!!!!!!!!!!!!!! */ + void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) { + BSONObjBuilder& b = *_b; + DEV log() << "replSet received elect msg " << cmd.toString() << rsLog; + else log(2) << "replSet received elect msg " << cmd.toString() << rsLog; + string set = cmd["set"].String(); + unsigned whoid = cmd["whoid"].Int(); + int cfgver = cmd["cfgver"].Int(); + OID round = cmd["round"].OID(); + int myver = rs.config().version; + + int vote = 0; + if( set != rs.name() ) { + log() << "replSet error received an elect request for '" << set << "' but our set name is '" << rs.name() << "'" << rsLog; + + } + else if( myver < cfgver ) { + // we are stale. don't vote + } + else if( myver > cfgver ) { + // they are stale! + log() << "replSet info got stale version # during election" << rsLog; + vote = -10000; + } + else { + try { + vote = yea(whoid); + rs.relinquish(); + log() << "replSet info voting yea for " << whoid << rsLog; + } + catch(VoteException&) { + log() << "replSet voting no already voted for another" << rsLog; + } + } + + b.append("vote", vote); + b.append("round", round); + } + + void ReplSetImpl::_getTargets(list& L, int& configVersion) { + configVersion = config().version; + for( Member *m = head(); m; m=m->next() ) + if( m->hbinfo().up() ) + L.push_back( Target(m->fullName()) ); + } + + /* config version is returned as it is ok to use this unlocked. BUT, if unlocked, you would need + to check later that the config didn't change. */ + void ReplSetImpl::getTargets(list& L, int& configVersion) { + if( lockedByMe() ) { + _getTargets(L, configVersion); + return; + } + lock lk(this); + _getTargets(L, configVersion); + } + + /* Do we have the newest data of them all? + @param allUp - set to true if all members are up. Only set if true returned. + @return true if we are freshest. Note we may tie. + */ + bool Consensus::weAreFreshest(bool& allUp, int& nTies) { + const OpTime ord = theReplSet->lastOpTimeWritten; + nTies = 0; + assert( !ord.isNull() ); + BSONObj cmd = BSON( + "replSetFresh" << 1 << + "set" << rs.name() << + "opTime" << Date_t(ord.asDate()) << + "who" << rs._self->fullName() << + "cfgver" << rs._cfg->version ); + list L; + int ver; + rs.getTargets(L, ver); + multiCommand(cmd, L); + int nok = 0; + allUp = true; + for( list::iterator i = L.begin(); i != L.end(); i++ ) { + if( i->ok ) { + nok++; + if( i->result["fresher"].trueValue() ) + return false; + OpTime remoteOrd( i->result["opTime"].Date() ); + if( remoteOrd == ord ) + nTies++; + assert( remoteOrd <= ord ); + } + else { + DEV log() << "replSet freshest returns " << i->result.toString() << rsLog; + allUp = false; + } + } + DEV log() << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog; + assert( ord <= theReplSet->lastOpTimeWritten ); // <= as this may change while we are working... + return true; + } + + extern time_t started; + + void Consensus::multiCommand(BSONObj cmd, list& L) { + assert( !rs.lockedByMe() ); + mongo::multiCommand(cmd, L); + } + + void Consensus::_electSelf() { + if( time(0) < steppedDown ) + return; + + { + const OpTime ord = theReplSet->lastOpTimeWritten; + if( ord == 0 ) { + log() << "replSet info not trying to elect self, do not yet have a complete set of data from any point in time" << rsLog; + return; + } + } + + bool allUp; + int nTies; + if( !weAreFreshest(allUp, nTies) ) { + log() << "replSet info not electing self, we are not freshest" << rsLog; + return; + } + + rs.sethbmsg("",9); + + if( !allUp && time(0) - started < 60 * 5 ) { + /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data + if we don't have to -- we'd rather be offline and wait a little longer instead + todo: make this configurable. + */ + rs.sethbmsg("not electing self, not all members up and we have been up less than 5 minutes"); + return; + } + + Member& me = *rs._self; + + if( nTies ) { + /* tie? we then randomly sleep to try to not collide on our voting. */ + /* todo: smarter. */ + if( me.id() == 0 || sleptLast ) { + // would be fine for one node not to sleep + // todo: biggest / highest priority nodes should be the ones that get to not sleep + } else { + assert( !rs.lockedByMe() ); // bad to go to sleep locked + unsigned ms = ((unsigned) rand()) % 1000 + 50; + DEV log() << "replSet tie " << nTies << " sleeping a little " << ms << "ms" << rsLog; + sleptLast = true; + sleepmillis(ms); + throw RetryAfterSleepException(); + } + } + sleptLast = false; + + time_t start = time(0); + unsigned meid = me.id(); + int tally = yea( meid ); + bool success = false; + try { + log() << "replSet info electSelf " << meid << rsLog; + + BSONObj electCmd = BSON( + "replSetElect" << 1 << + "set" << rs.name() << + "who" << me.fullName() << + "whoid" << me.hbinfo().id() << + "cfgver" << rs._cfg->version << + "round" << OID::gen() /* this is just for diagnostics */ + ); + + int configVersion; + list L; + rs.getTargets(L, configVersion); + multiCommand(electCmd, L); + + { + RSBase::lock lk(&rs); + for( list::iterator i = L.begin(); i != L.end(); i++ ) { + DEV log() << "replSet elect res: " << i->result.toString() << rsLog; + if( i->ok ) { + int v = i->result["vote"].Int(); + tally += v; + } + } + if( tally*2 <= totalVotes() ) { + log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog; + } + else if( time(0) - start > 30 ) { + // defensive; should never happen as we have timeouts on connection and operation for our conn + log() << "replSet too much time passed during our election, ignoring result" << rsLog; + } + else if( configVersion != rs.config().version ) { + log() << "replSet config version changed during our election, ignoring result" << rsLog; + } + else { + /* succeeded. */ + log(1) << "replSet election succeeded, assuming primary role" << rsLog; + success = true; + rs.assumePrimary(); + } + } + } catch( std::exception& ) { + if( !success ) electionFailed(meid); + throw; + } + if( !success ) electionFailed(meid); + } + + void Consensus::electSelf() { + assert( !rs.lockedByMe() ); + assert( !rs.myConfig().arbiterOnly ); + assert( rs.myConfig().slaveDelay == 0 ); + try { + _electSelf(); + } + catch(RetryAfterSleepException&) { + throw; + } + catch(VoteException& ) { + log() << "replSet not trying to elect self as responded yea to someone else recently" << rsLog; + } + catch(DBException& e) { + log() << "replSet warning caught unexpected exception in electSelf() " << e.toString() << rsLog; + } + catch(...) { + log() << "replSet warning caught unexpected exception in electSelf()" << rsLog; + } + } + +} diff -Nru mongodb-1.4.4/db/repl/health.cpp mongodb-1.6.3/db/repl/health.cpp --- mongodb-1.4.4/db/repl/health.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/health.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,393 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "rs.h" +#include "health.h" +#include "../../util/background.h" +#include "../../client/dbclient.h" +#include "../commands.h" +#include "../../util/concurrency/value.h" +#include "../../util/concurrency/task.h" +#include "../../util/mongoutils/html.h" +#include "../../util/goodies.h" +#include "../../util/ramlog.h" +#include "../helpers/dblogger.h" +#include "connections.h" +#include "../../util/unittest.h" +#include "../dbhelpers.h" + +namespace mongo { + /* decls for connections.h */ + ScopedConn::M& ScopedConn::_map = *(new ScopedConn::M()); + mutex ScopedConn::mapMutex("ScopedConn::mapMutex"); +} + +namespace mongo { + + using namespace mongoutils::html; + using namespace bson; + + static RamLog _rsLog; + Tee *rsLog = &_rsLog; + + string ago(time_t t) { + if( t == 0 ) return ""; + + time_t x = time(0) - t; + stringstream s; + if( x < 180 ) { + s << x << " sec"; + if( x != 1 ) s << 's'; + } + else if( x < 3600 ) { + s.precision(2); + s << x / 60.0 << " mins"; + } + else { + s.precision(2); + s << x / 3600.0 << " hrs"; + } + return s.str(); + } + + void Member::summarizeMember(stringstream& s) const { + s << tr(); + { + stringstream u; + u << "http://" << h().host() << ':' << (h().port() + 1000) << "/_replSet"; + s << td( a(u.str(), "", fullName()) ); + } + s << td( id() ); + double h = hbinfo().health; + bool ok = h > 0; + s << td(red(str::stream() << h,h == 0)); + s << td(ago(hbinfo().upSince)); + bool never = false; + { + string h; + time_t hb = hbinfo().lastHeartbeat; + if( hb == 0 ) { + h = "never"; + never = true; + } + else h = ago(hb) + " ago"; + s << td(h); + } + s << td(config().votes); + { + string stateText = state().toString(); + if( _config.hidden ) + stateText += " (hidden)"; + if( ok || stateText.empty() ) + s << td(stateText); // text blank if we've never connected + else + s << td( grey(str::stream() << "(was " << state().toString() << ')', true) ); + } + s << td( grey(hbinfo().lastHeartbeatMsg,!ok) ); + stringstream q; + q << "/_replSetOplog?" << id(); + s << td( a(q.str(), "", never ? "?" : hbinfo().opTime.toString()) ); + if( hbinfo().skew > INT_MIN ) { + s << td( grey(str::stream() << hbinfo().skew,!ok) ); + } else + s << td(""); + s << _tr(); + } + + string ReplSetImpl::stateAsHtml(MemberState s) { + if( s.s == MemberState::RS_STARTUP ) return a("", "serving still starting up, or still trying to initiate the set", "STARTUP"); + if( s.s == MemberState::RS_PRIMARY ) return a("", "this server thinks it is primary", "PRIMARY"); + if( s.s == MemberState::RS_SECONDARY ) return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY"); + if( s.s == MemberState::RS_RECOVERING ) return a("", "recovering/resyncing; after recovery usually auto-transitions to secondary", "RECOVERING"); + if( s.s == MemberState::RS_FATAL ) return a("", "something bad has occurred and server is not completely offline with regard to the replica set. fatal error.", "FATAL"); + if( s.s == MemberState::RS_STARTUP2 ) return a("", "loaded config, still determining who is primary", "STARTUP2"); + if( s.s == MemberState::RS_ARBITER ) return a("", "this server is an arbiter only", "ARBITER"); + if( s.s == MemberState::RS_DOWN ) return a("", "member is down, slow, or unreachable", "DOWN"); + if( s.s == MemberState::RS_ROLLBACK ) return a("", "rolling back operations to get in sync", "ROLLBACK"); + return ""; + } + + string MemberState::toString() const { + if( s == MemberState::RS_STARTUP ) return "STARTUP"; + if( s == MemberState::RS_PRIMARY ) return "PRIMARY"; + if( s == MemberState::RS_SECONDARY ) return "SECONDARY"; + if( s == MemberState::RS_RECOVERING ) return "RECOVERING"; + if( s == MemberState::RS_FATAL ) return "FATAL"; + if( s == MemberState::RS_STARTUP2 ) return "STARTUP2"; + if( s == MemberState::RS_ARBITER ) return "ARBITER"; + if( s == MemberState::RS_DOWN ) return "DOWN"; + if( s == MemberState::RS_ROLLBACK ) return "ROLLBACK"; + return ""; + } + + extern time_t started; + + // oplogdiags in web ui + static void say(stringstream&ss, const bo& op) { + ss << ""; + + set skip; + be e = op["ts"]; + if( e.type() == Date || e.type() == Timestamp ) { + OpTime ot = e._opTime(); + ss << td( time_t_to_String_short( ot.getSecs() ) ); + ss << td( ot.toString() ); + skip.insert("ts"); + } + else ss << td("?") << td("?"); + + e = op["h"]; + if( e.type() == NumberLong ) { + ss << "" << hex << e.Long() << "\n"; + skip.insert("h"); + } else + ss << td("?"); + + ss << td(op["op"].valuestrsafe()); + ss << td(op["ns"].valuestrsafe()); + skip.insert("op"); + skip.insert("ns"); + + ss << ""; + for( bo::iterator i(op); i.more(); ) { + be e = i.next(); + if( skip.count(e.fieldName()) ) continue; + ss << e.toString() << ' '; + } + ss << ""; + + ss << ""; + ss << '\n'; + } + + void ReplSetImpl::_getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const { + const Member *m = findById(server_id); + if( m == 0 ) { + ss << "Error : can't find a member with id: " << server_id << '\n'; + return; + } + + ss << p("Server : " + m->fullName() + "
ns : " + rsoplog ); + + //const bo fields = BSON( "o" << false << "o2" << false ); + const bo fields; + + ScopedConn conn(m->fullName()); + + auto_ptr c = conn->query(rsoplog, Query().sort("$natural",1), 20, 0, &fields); + if( c.get() == 0 ) { + ss << "couldn't query " << rsoplog; + return; + } + static const char *h[] = {"ts","optime", "h","op","ns","rest",0}; + + ss << "\n"; + + ss << table(h, true); + //ss << "
\n";
+        int n = 0;
+        OpTime otFirst;
+        OpTime otLast;
+        OpTime otEnd;
+        while( c->more() ) {
+            bo o = c->next();
+            otLast = o["ts"]._opTime();
+            if( otFirst.isNull() ) 
+                otFirst = otLast;
+            say(ss, o);
+            n++;            
+        }
+        if( n == 0 ) {
+            ss << rsoplog << " is empty\n";
+        }
+        else { 
+            auto_ptr c = conn->query(rsoplog, Query().sort("$natural",-1), 20, 0, &fields);
+            if( c.get() == 0 ) { 
+                ss << "couldn't query [2] " << rsoplog;
+                return;
+            }
+            string x;
+            bo o = c->next();
+            otEnd = o["ts"]._opTime();
+            while( 1 ) {
+                stringstream z;
+                if( o["ts"]._opTime() == otLast ) 
+                    break;
+                say(z, o);
+                x = z.str() + x;
+                if( !c->more() )
+                    break;
+                o = c->next();
+            }
+            if( !x.empty() ) {
+                ss << "...............\n" << x;
+                //ss << "\n...\n\n" << x;
+            }
+        }
+        ss << _table();
+        ss << p(time_t_to_String_short(time(0)) + " current time");
+
+        //ss << "
\n"; + + if( !otEnd.isNull() ) { + ss << "

Log length in time: "; + unsigned d = otEnd.getSecs() - otFirst.getSecs(); + double h = d / 3600.0; + ss.precision(3); + if( h < 72 ) + ss << h << " hours"; + else + ss << h / 24.0 << " days"; + ss << "

\n"; + } + + } + + void ReplSetImpl::_summarizeAsHtml(stringstream& s) const { + s << table(0, false); + s << tr("Set name:", _name); + s << tr("Majority up:", elect.aMajoritySeemsToBeUp()?"yes":"no" ); + s << _table(); + + const char *h[] = {"Member", + "id", + "Up", + "cctime", + "Last heartbeat", + "Votes", "State", "Status", + "optime", + "skew", + 0}; + s << table(h); + + /* this is to sort the member rows by their ordinal _id, so they show up in the same + order on all the different web ui's; that is less confusing for the operator. */ + map mp; + + string myMinValid; + try { + readlocktry lk("local.replset.minvalid", 300); + if( lk.got() ) { + BSONObj mv; + if( Helpers::getSingleton("local.replset.minvalid", mv) ) { + myMinValid = "minvalid:" + mv["ts"]._opTime().toString(); + } + } + else myMinValid = "."; + } + catch(...) { + myMinValid = "exception fetching minvalid"; + } + + { + stringstream s; + /* self row */ + s << tr() << td(_self->fullName() + " (me)") << + td(_self->id()) << + td("1") << //up + td(ago(started)) << + td("") << // last heartbeat + td(ToString(_self->config().votes)) << + td( stateAsHtml(box.getState()) + (_self->config().hidden?" (hidden)":"") ); + s << td( _hbmsg ); + stringstream q; + q << "/_replSetOplog?" << _self->id(); + s << td( a(q.str(), myMinValid, theReplSet->lastOpTimeWritten.toString()) ); + s << td(""); // skew + s << _tr(); + mp[_self->hbinfo().id()] = s.str(); + } + Member *m = head(); + while( m ) { + stringstream s; + m->summarizeMember(s); + mp[m->hbinfo().id()] = s.str(); + m = m->next(); + } + + for( map::const_iterator i = mp.begin(); i != mp.end(); i++ ) + s << i->second; + s << _table(); + } + + + void fillRsLog(stringstream& s) { + _rsLog.toHTML( s ); + } + + const Member* ReplSetImpl::findById(unsigned id) const { + if( id == _self->id() ) return _self; + for( Member *m = head(); m; m = m->next() ) + if( m->id() == id ) + return m; + return 0; + } + + void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const { + vector v; + + // add self + { + HostAndPort h(getHostName(), cmdLine.port); + + BSONObjBuilder bb; + bb.append("_id", (int) _self->id()); + bb.append("name", h.toString()); + bb.append("health", 1.0); + bb.append("state", (int) box.getState().s); + string s = _self->lhb(); + if( !s.empty() ) + bb.append("errmsg", s); + bb.append("self", true); + v.push_back(bb.obj()); + } + + Member *m =_members.head(); + while( m ) { + BSONObjBuilder bb; + bb.append("_id", (int) m->id()); + bb.append("name", m->fullName()); + bb.append("health", m->hbinfo().health); + bb.append("state", (int) m->state().s); + bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0)); + bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat); + string s = m->lhb(); + if( !s.empty() ) + bb.append("errmsg", s); + v.push_back(bb.obj()); + m = m->next(); + } + sort(v.begin(), v.end()); + b.append("set", name()); + b.appendTimeT("date", time(0)); + b.append("myState", box.getState().s); + b.append("members", v); + } + + static struct Test : public UnitTest { + void run() { + HealthOptions a,b; + assert( a == b ); + assert( a.isDefault() ); + } + } test; + +} diff -Nru mongodb-1.4.4/db/repl/health.h mongodb-1.6.3/db/repl/health.h --- mongodb-1.4.4/db/repl/health.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/health.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,50 @@ +// replset.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +namespace mongo { + + /* throws */ + bool requestHeartbeat(string setname, string fromHost, string memberFullName, BSONObj& result, int myConfigVersion, int& theirConfigVersion, bool checkEmpty = false); + + struct HealthOptions { + HealthOptions() { + heartbeatSleepMillis = 2000; + heartbeatTimeoutMillis = 10000; + heartbeatConnRetries = 2; + } + + bool isDefault() const { return *this == HealthOptions(); } + + // see http://www.mongodb.org/display/DOCS/Replica+Set+Internals + unsigned heartbeatSleepMillis; + unsigned heartbeatTimeoutMillis; + unsigned heartbeatConnRetries ; + + void check() { + uassert(13112, "bad replset heartbeat option", heartbeatSleepMillis >= 10); + uassert(13113, "bad replset heartbeat option", heartbeatTimeoutMillis >= 10); + } + + bool operator==(const HealthOptions& r) const { + return heartbeatSleepMillis==r.heartbeatSleepMillis && heartbeatTimeoutMillis==r.heartbeatTimeoutMillis && heartbeatConnRetries==heartbeatConnRetries; + } + }; + +} diff -Nru mongodb-1.4.4/db/repl/heartbeat.cpp mongodb-1.6.3/db/repl/heartbeat.cpp --- mongodb-1.4.4/db/repl/heartbeat.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/heartbeat.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,278 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "rs.h" +#include "health.h" +#include "../../util/background.h" +#include "../../client/dbclient.h" +#include "../commands.h" +#include "../../util/concurrency/value.h" +#include "../../util/concurrency/task.h" +#include "../../util/concurrency/msg.h" +#include "../../util/mongoutils/html.h" +#include "../../util/goodies.h" +#include "../../util/ramlog.h" +#include "../helpers/dblogger.h" +#include "connections.h" +#include "../../util/unittest.h" +#include "../instance.h" + +namespace mongo { + + using namespace bson; + + extern bool replSetBlind; + + // hacky + string *discoveredSeed = 0; + + long long HeartbeatInfo::timeDown() const { + if( up() ) return 0; + if( downSince == 0 ) + return 0; // still waiting on first heartbeat + return jsTime() - downSince; + } + + /* { replSetHeartbeat : } */ + class CmdReplSetHeartbeat : public ReplSetCommand { + public: + virtual bool adminOnly() const { return false; } + CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( replSetBlind ) + return false; + + /* we don't call ReplSetCommand::check() here because heartbeat + checks many things that are pre-initialization. */ + if( !replSet ) { + errmsg = "not running with --replSet"; + return false; + } + + /* we want to keep heartbeat connections open when relinquishing primary. tag them here. */ + { + MessagingPort *mp = cc()._mp; + if( mp ) + mp->tag |= 1; + } + + if( cmdObj["pv"].Int() != 1 ) { + errmsg = "incompatible replset protocol version"; + return false; + } + { + string s = string(cmdObj.getStringField("replSetHeartbeat")); + if( cmdLine.ourSetName() != s ) { + errmsg = "repl set names do not match"; + log() << "cmdline: " << cmdLine._replSet << endl; + log() << "s: " << s << endl; + result.append("mismatch", true); + return false; + } + } + + result.append("rs", true); + if( cmdObj["checkEmpty"].trueValue() ) { + result.append("hasData", replHasDatabases()); + } + if( theReplSet == 0 ) { + string from( cmdObj.getStringField("from") ); + if( !from.empty() && discoveredSeed == 0 ) { + discoveredSeed = new string(from); + } + errmsg = "still initializing"; + return false; + } + + if( theReplSet->name() != cmdObj.getStringField("replSetHeartbeat") ) { + errmsg = "repl set names do not match (2)"; + result.append("mismatch", true); + return false; + } + result.append("set", theReplSet->name()); + result.append("state", theReplSet->state().s); + result.append("hbmsg", theReplSet->hbmsg()); + result.append("time", (long long) time(0)); + result.appendDate("opTime", theReplSet->lastOpTimeWritten.asDate()); + int v = theReplSet->config().version; + result.append("v", v); + if( v > cmdObj["v"].Int() ) + result << "config" << theReplSet->config().asBson(); + + return true; + } + } cmdReplSetHeartbeat; + + /* throws dbexception */ + bool requestHeartbeat(string setName, string from, string memberFullName, BSONObj& result, int myCfgVersion, int& theirCfgVersion, bool checkEmpty) { + if( replSetBlind ) { + //sleepmillis( rand() ); + return false; + } + + BSONObj cmd = BSON( "replSetHeartbeat" << setName << "v" << myCfgVersion << "pv" << 1 << "checkEmpty" << checkEmpty << "from" << from ); + + // we might be talking to ourself - generally not a great idea to do outbound waiting calls in a write lock + assert( !dbMutex.isWriteLocked() ); + + // these are slow (multisecond to respond), so generally we don't want to be locked, at least not without + // thinking acarefully about it first. + assert( theReplSet == 0 || !theReplSet->lockedByMe() ); + + ScopedConn conn(memberFullName); + return conn->runCommand("admin", cmd, result); + } + + /* poll every other set member to check its status */ + class ReplSetHealthPollTask : public task::Task { + HostAndPort h; + HeartbeatInfo m; + public: + ReplSetHealthPollTask(const HostAndPort& hh, const HeartbeatInfo& mm) : h(hh), m(mm) { } + + string name() { return "ReplSetHealthPollTask"; } + void doWork() { + if ( !theReplSet ) { + log(2) << "theReplSet not initialized yet, skipping health poll this round" << rsLog; + return; + } + + HeartbeatInfo mem = m; + HeartbeatInfo old = mem; + try { + BSONObj info; + int theirConfigVersion = -10000; + + time_t before = time(0); + + bool ok = requestHeartbeat(theReplSet->name(), theReplSet->selfFullName(), h.toString(), info, theReplSet->config().version, theirConfigVersion); + + time_t after = mem.lastHeartbeat = time(0); // we set this on any response - we don't get this far if couldn't connect because exception is thrown + + try { + mem.skew = 0; + long long t = info["time"].Long(); + if( t > after ) + mem.skew = (int) (t - after); + else if( t < before ) + mem.skew = (int) (t - before); // negative + } + catch(...) { + mem.skew = INT_MIN; + } + + { + be state = info["state"]; + if( state.ok() ) + mem.hbstate = MemberState(state.Int()); + } + if( ok ) { + if( mem.upSince == 0 ) { + log() << "replSet info " << h.toString() << " is now up" << rsLog; + mem.upSince = mem.lastHeartbeat; + } + mem.health = 1.0; + mem.lastHeartbeatMsg = info["hbmsg"].String(); + if( info.hasElement("opTime") ) + mem.opTime = info["opTime"].Date(); + + be cfg = info["config"]; + if( cfg.ok() ) { + // received a new config + boost::function f = + boost::bind(&Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy()); + theReplSet->mgr->send(f); + } + } + else { + down(mem, info.getStringField("errmsg")); + } + } + catch(...) { + down(mem, "connect/transport error"); + } + m = mem; + + theReplSet->mgr->send( boost::bind(&ReplSet::msgUpdateHBInfo, theReplSet, mem) ); + + static time_t last = 0; + time_t now = time(0); + bool changed = mem.changed(old); + if( changed ) { + if( old.hbstate != mem.hbstate ) + log() << "replSet " << h.toString() << ' ' << mem.hbstate.toString() << rsLog; + } + if( changed || now-last>4 ) { + last = now; + theReplSet->mgr->send( boost::bind(&Manager::msgCheckNewState, theReplSet->mgr) ); + } + } + + private: + void down(HeartbeatInfo& mem, string msg) { + mem.health = 0.0; + if( mem.upSince || mem.downSince == 0 ) { + mem.upSince = 0; + mem.downSince = jsTime(); + log() << "replSet info " << h.toString() << " is now down (or slow to respond)" << rsLog; + } + mem.lastHeartbeatMsg = msg; + } + }; + + void ReplSetImpl::endOldHealthTasks() { + unsigned sz = healthTasks.size(); + for( set::iterator i = healthTasks.begin(); i != healthTasks.end(); i++ ) + (*i)->halt(); + healthTasks.clear(); + if( sz ) + DEV log() << "replSet debug: cleared old tasks " << sz << endl; + } + + void ReplSetImpl::startHealthTaskFor(Member *m) { + ReplSetHealthPollTask *task = new ReplSetHealthPollTask(m->h(), m->hbinfo()); + healthTasks.insert(task); + task::repeat(task, 2000); + } + + void startSyncThread(); + + /** called during repl set startup. caller expects it to return fairly quickly. + note ReplSet object is only created once we get a config - so this won't run + until the initiation. + */ + void ReplSetImpl::startThreads() { + task::fork(mgr); + + /*Member* m = _members.head(); + while( m ) { + ReplSetHealthPollTask *task = new ReplSetHealthPollTask(m->h(), m->hbinfo()); + healthTasks.insert(task); + task::repeat(shared_ptr(task), 2000); + m = m->next(); + }*/ + + mgr->send( boost::bind(&Manager::msgCheckNewState, theReplSet->mgr) ); + + boost::thread t(startSyncThread); + } + +} + +/* todo: + stop bg job and delete on removefromset +*/ diff -Nru mongodb-1.4.4/db/repl/manager.cpp mongodb-1.6.3/db/repl/manager.cpp --- mongodb-1.4.4/db/repl/manager.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/manager.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,191 @@ +/* @file manager.cpp +*/ + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "rs.h" +#include "../client.h" + +namespace mongo { + + enum { + NOPRIMARY = -2, + SELFPRIMARY = -1 + }; + + /* check members OTHER THAN US to see if they think they are primary */ + const Member * Manager::findOtherPrimary(bool& two) { + two = false; + Member *m = rs->head(); + Member *p = 0; + while( m ) { + DEV assert( m != rs->_self ); + if( m->state().primary() && m->hbinfo().up() ) { + if( p ) { + two = true; + return 0; + } + p = m; + } + m = m->next(); + } + if( p ) + noteARemoteIsPrimary(p); + return p; + } + + Manager::Manager(ReplSetImpl *_rs) : + task::Server("rs Manager"), rs(_rs), busyWithElectSelf(false), _primary(NOPRIMARY) + { + } + + Manager::~Manager() { + log() << "ERROR: ~Manager should never be called" << rsLog; + rs->mgr = 0; + assert(false); + } + + void Manager::starting() { + Client::initThread("rs Manager"); + } + + void Manager::noteARemoteIsPrimary(const Member *m) { + if( rs->box.getPrimary() == m ) + return; + rs->_self->lhb() = ""; + if( rs->iAmArbiterOnly() ) { + rs->box.set(MemberState::RS_ARBITER, m); + } else { + rs->box.noteRemoteIsPrimary(m); + } + } + + /** called as the health threads get new results */ + void Manager::msgCheckNewState() { + { + theReplSet->assertValid(); + rs->assertValid(); + + RSBase::lock lk(rs); + + if( busyWithElectSelf ) return; + + const Member *p = rs->box.getPrimary(); + if( p && p != rs->_self ) { + if( !p->hbinfo().up() || + !p->hbinfo().hbstate.primary() ) + { + p = 0; + rs->box.setOtherPrimary(0); + } + } + + const Member *p2; + { + bool two; + p2 = findOtherPrimary(two); + if( two ) { + /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */ + log() << "replSet warning DIAG two primaries (transiently)" << rsLog; + return; + } + } + + if( p2 ) { + /* someone else thinks they are primary. */ + if( p == p2 ) { + // we thought the same; all set. + return; + } + if( p == 0 ) { + noteARemoteIsPrimary(p2); + return; + } + // todo xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + if( p != rs->_self ) { + // switch primary from oldremotep->newremotep2 + noteARemoteIsPrimary(p2); + return; + } + /* we thought we were primary, yet now someone else thinks they are. */ + if( !rs->elect.aMajoritySeemsToBeUp() ) { + /* we can't see a majority. so the other node is probably the right choice. */ + noteARemoteIsPrimary(p2); + return; + } + /* ignore for now, keep thinking we are master. + this could just be timing (we poll every couple seconds) or could indicate + a problem? if it happens consistently for a duration of time we should + alert the sysadmin. + */ + return; + } + + /* didn't find anyone who wants to be primary */ + + if( p ) { + /* we are already primary */ + + if( p != rs->_self ) { + rs->sethbmsg("error p != rs->self in checkNewState"); + log() << "replSet " << p->fullName() << rsLog; + log() << "replSet " << rs->_self->fullName() << rsLog; + return; + } + + if( rs->elect.shouldRelinquish() ) { + log() << "replSet can't see a majority of the set, relinquishing primary" << rsLog; + rs->relinquish(); + } + + return; + } + + if( !rs->iAmPotentiallyHot() ) // if not we never try to be primary + return; + + /* TODO : CHECK PRIORITY HERE. can't be elected if priority zero. */ + + /* no one seems to be primary. shall we try to elect ourself? */ + if( !rs->elect.aMajoritySeemsToBeUp() ) { + static time_t last; + static int n; + int ll = 0; + if( ++n > 5 ) ll++; + if( last + 60 > time(0 ) ) ll++; + log(ll) << "replSet can't see a majority, will not try to elect self" << rsLog; + last = time(0); + return; + } + + busyWithElectSelf = true; // don't try to do further elections & such while we are already working on one. + } + try { + rs->elect.electSelf(); + } + catch(RetryAfterSleepException&) { + /* we want to process new inbounds before trying this again. so we just put a checkNewstate in the queue for eval later. */ + requeue(); + } + catch(...) { + log() << "replSet error unexpected assertion in rs manager" << rsLog; + } + busyWithElectSelf = false; + } + +} diff -Nru mongodb-1.4.4/db/repl/multicmd.h mongodb-1.6.3/db/repl/multicmd.h --- mongodb-1.4.4/db/repl/multicmd.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/multicmd.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,70 @@ +// @file multicmd.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../../util/background.h" +#include "connections.h" + +namespace mongo { + + struct Target { + Target(string hostport) : toHost(hostport), ok(false) { } + Target() : ok(false) { } + string toHost; + bool ok; + BSONObj result; + }; + + /* -- implementation ------------- */ + + class _MultiCommandJob : public BackgroundJob { + public: + BSONObj& cmd; + Target& d; + _MultiCommandJob(BSONObj& _cmd, Target& _d) : cmd(_cmd), d(_d) { } + private: + string name() { return "MultiCommandJob"; } + void run() { + try { + ScopedConn c(d.toHost); + d.ok = c->runCommand("admin", cmd, d.result); + } + catch(DBException&) { + DEV log() << "dev caught dbexception on multiCommand " << d.toHost << rsLog; + } + } + }; + + inline void multiCommand(BSONObj cmd, list& L) { + typedef shared_ptr<_MultiCommandJob> P; + list

jobs; + list _jobs; + + for( list::iterator i = L.begin(); i != L.end(); i++ ) { + Target& d = *i; + _MultiCommandJob *j = new _MultiCommandJob(cmd, d); + jobs.push_back(P(j)); + _jobs.push_back(j); + } + + BackgroundJob::go(_jobs); + BackgroundJob::wait(_jobs,5); + } + +} diff -Nru mongodb-1.4.4/db/repl/replset_commands.cpp mongodb-1.6.3/db/repl/replset_commands.cpp --- mongodb-1.4.4/db/repl/replset_commands.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/replset_commands.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,305 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../cmdline.h" +#include "../commands.h" +#include "health.h" +#include "rs.h" +#include "rs_config.h" +#include "../dbwebserver.h" +#include "../../util/mongoutils/html.h" +#include "../../client/dbclient.h" + +namespace mongo { + + void checkMembersUpForConfigChange(const ReplSetConfig& cfg, bool initial); + + /* commands in other files: + replSetHeartbeat - health.cpp + replSetInitiate - rs_mod.cpp + */ + + bool replSetBlind = false; + unsigned replSetForceInitialSyncFailure = 0; + + class CmdReplSetTest : public ReplSetCommand { + public: + virtual void help( stringstream &help ) const { + help << "Just for regression tests.\n"; + } + CmdReplSetTest() : ReplSetCommand("replSetTest") { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + log() << "replSet replSetTest command received: " << cmdObj.toString() << rsLog; + if( cmdObj.hasElement("forceInitialSyncFailure") ) { + replSetForceInitialSyncFailure = (unsigned) cmdObj["forceInitialSyncFailure"].Number(); + return true; + } + + // may not need this, but if removed check all tests still work: + if( !check(errmsg, result) ) + return false; + + if( cmdObj.hasElement("blind") ) { + replSetBlind = cmdObj.getBoolField("blind"); + return true; + } + return false; + } + } cmdReplSetTest; + + class CmdReplSetGetRBID : public ReplSetCommand { + public: + /* todo: ideally this should only change on rollbacks NOT on mongod restarts also. fix... */ + int rbid; + virtual void help( stringstream &help ) const { + help << "internal"; + } + CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") { + rbid = (int) curTimeMillis(); + } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( !check(errmsg, result) ) + return false; + result.append("rbid",rbid); + return true; + } + } cmdReplSetRBID; + + using namespace bson; + void incRBID() { + cmdReplSetRBID.rbid++; + } + int getRBID(DBClientConnection *c) { + bo info; + c->simpleCommand("admin", &info, "replSetGetRBID"); + return info["rbid"].numberInt(); + } + + class CmdReplSetGetStatus : public ReplSetCommand { + public: + virtual void help( stringstream &help ) const { + help << "Report status of a replica set from the POV of this server\n"; + help << "{ replSetGetStatus : 1 }"; + help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; + } + CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( !check(errmsg, result) ) + return false; + theReplSet->summarizeStatus(result); + return true; + } + } cmdReplSetGetStatus; + + class CmdReplSetReconfig : public ReplSetCommand { + RWLock mutex; /* we don't need rw but we wanted try capability. :-( */ + public: + virtual void help( stringstream &help ) const { + help << "Adjust configuration of a replica set\n"; + help << "{ replSetReconfig : config_object }"; + help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; + } + CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") { } + virtual bool run(const string& a, BSONObj& b, string& errmsg, BSONObjBuilder& c, bool d) { + try { + rwlock_try_write lk(mutex); + return _run(a,b,errmsg,c,d); + } + catch(rwlock_try_write::exception&) { } + errmsg = "a replSetReconfig is already in progress"; + return false; + } + private: + bool _run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( !check(errmsg, result) ) + return false; + if( !theReplSet->box.getState().primary() ) { + errmsg = "replSetReconfig command must be sent to the current replica set primary."; + return false; + } + + { + // just make sure we can get a write lock before doing anything else. we'll reacquire one + // later. of course it could be stuck then, but this check lowers the risk if weird things + // are up - we probably don't want a change to apply 30 minutes after the initial attempt. + time_t t = time(0); + writelock lk(""); + if( time(0)-t > 20 ) { + errmsg = "took a long time to get write lock, so not initiating. Initiate when server less busy?"; + return false; + } + } + + if( cmdObj["replSetReconfig"].type() != Object ) { + errmsg = "no configuration specified"; + return false; + } + + /** TODO + Support changes when a majority, but not all, members of a set are up. + Determine what changes should not be allowed as they would cause erroneous states. + What should be possible when a majority is not up? + */ + try { + ReplSetConfig newConfig(cmdObj["replSetReconfig"].Obj()); + + log() << "replSet replSetReconfig config object parses ok, " << newConfig.members.size() << " members specified" << rsLog; + + if( !ReplSetConfig::legalChange(theReplSet->getConfig(), newConfig, errmsg) ) { + return false; + } + + checkMembersUpForConfigChange(newConfig,false); + + log() << "replSet replSetReconfig [2]" << rsLog; + + theReplSet->haveNewConfig(newConfig, true); + ReplSet::startupStatusMsg = "replSetReconfig'd"; + } + catch( DBException& e ) { + log() << "replSet replSetReconfig exception: " << e.what() << rsLog; + throw; + } + + return true; + } + } cmdReplSetReconfig; + + class CmdReplSetFreeze : public ReplSetCommand { + public: + virtual void help( stringstream &help ) const { + help << "Enable / disable failover for the set - locks current primary as primary even if issues occur.\nFor use during system maintenance.\n"; + help << "{ replSetFreeze : }"; + help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; + } + + CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( !check(errmsg, result) ) + return false; + errmsg = "not yet implemented"; /*TODO*/ + return false; + } + } cmdReplSetFreeze; + + class CmdReplSetStepDown: public ReplSetCommand { + public: + virtual void help( stringstream &help ) const { + help << "Step down as primary. Will not try to reelect self or 1 minute.\n"; + help << "(If another member with same priority takes over in the meantime, it will stay primary.)\n"; + help << "http://www.mongodb.org/display/DOCS/Replica+Set+Commands"; + } + + CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if( !check(errmsg, result) ) + return false; + if( !theReplSet->box.getState().primary() ) { + errmsg = "not primary so can't step down"; + return false; + } + return theReplSet->stepDown(); + } + } cmdReplSetStepDown; + + using namespace bson; + using namespace mongoutils::html; + extern void fillRsLog(stringstream&); + + class ReplSetHandler : public DbWebHandler { + public: + ReplSetHandler() : DbWebHandler( "_replSet" , 1 , true ){} + + virtual bool handles( const string& url ) const { + return startsWith( url , "/_replSet" ); + } + + virtual void handle( const char *rq, string url, + string& responseMsg, int& responseCode, + vector& headers, const SockAddr &from ){ + + string s = str::after(url, "/_replSetOplog?"); + if( !s.empty() ) + responseMsg = _replSetOplog(s); + else + responseMsg = _replSet(); + responseCode = 200; + } + + + string _replSetOplog(string parms) { + stringstream s; + string t = "Replication oplog"; + s << start(t); + s << p(t); + + if( theReplSet == 0 ) { + if( cmdLine._replSet.empty() ) + s << p("Not using --replSet"); + else { + s << p("Still starting up, or else set is not yet " + a("http://www.mongodb.org/display/DOCS/Replica+Set+Configuration#InitialSetup", "", "initiated") + + ".
" + ReplSet::startupStatusMsg); + } + } + else { + try { + theReplSet->getOplogDiagsAsHtml(stringToNum(parms.c_str()), s); + } + catch(std::exception& e) { + s << "error querying oplog: " << e.what() << '\n'; + } + } + + s << _end(); + return s.str(); + } + + /* /_replSet show replica set status in html format */ + string _replSet() { + stringstream s; + s << start("Replica Set Status " + prettyHostName()); + s << p( a("/", "back", "Home") + " | " + + a("/local/system.replset/?html=1", "", "View Replset Config") + " | " + + a("/replSetGetStatus?text", "", "replSetGetStatus") + " | " + + a("http://www.mongodb.org/display/DOCS/Replica+Sets", "", "Docs") + ); + + if( theReplSet == 0 ) { + if( cmdLine._replSet.empty() ) + s << p("Not using --replSet"); + else { + s << p("Still starting up, or else set is not yet " + a("http://www.mongodb.org/display/DOCS/Replica+Set+Configuration#InitialSetup", "", "initiated") + + ".
" + ReplSet::startupStatusMsg); + } + } + else { + try { + theReplSet->summarizeAsHtml(s); + } + catch(...) { s << "error summarizing replset status\n"; } + } + s << p("Recent replset log activity:"); + fillRsLog(s); + s << _end(); + return s.str(); + } + + + + } replSetHandler; + +} diff -Nru mongodb-1.4.4/db/repl/rs_config.cpp mongodb-1.6.3/db/repl/rs_config.cpp --- mongodb-1.4.4/db/repl/rs_config.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_config.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,366 @@ +// rs_config.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "rs.h" +#include "../../client/dbclient.h" +#include "../../client/syncclusterconnection.h" +#include "../../util/hostandport.h" +#include "../dbhelpers.h" +#include "connections.h" +#include "../oplog.h" + +using namespace bson; + +namespace mongo { + + void logOpInitiate(const bo&); + + void assertOnlyHas(BSONObj o, const set& fields) { + BSONObj::iterator i(o); + while( i.more() ) { + BSONElement e = i.next(); + if( !fields.count( e.fieldName() ) ) { + uasserted(13434, str::stream() << "unexpected field '" << e.fieldName() << "'in object"); + } + } + } + + list ReplSetConfig::otherMemberHostnames() const { + list L; + for( vector::const_iterator i = members.begin(); i != members.end(); i++ ) { + if( !i->h.isSelf() ) + L.push_back(i->h); + } + return L; + } + + /* comment MUST only be set when initiating the set by the initiator */ + void ReplSetConfig::saveConfigLocally(bo comment) { + checkRsConfig(); + log() << "replSet info saving a newer config version to local.system.replset" << rsLog; + { + writelock lk(""); + Client::Context cx( rsConfigNs ); + cx.db()->flushFiles(true); + + //theReplSet->lastOpTimeWritten = ??; + //rather than above, do a logOp()? probably + BSONObj o = asBson(); + Helpers::putSingletonGod(rsConfigNs.c_str(), o, false/*logOp=false; local db so would work regardless...*/); + if( !comment.isEmpty() ) + logOpInitiate(comment); + + cx.db()->flushFiles(true); + } + DEV log() << "replSet saveConfigLocally done" << rsLog; + } + + /*static*/ + /*void ReplSetConfig::receivedNewConfig(BSONObj cfg) { + if( theReplSet ) + return; // this is for initial setup only, so far. todo + + ReplSetConfig c(cfg); + + writelock lk("admin."); + if( theReplSet ) + return; + c.saveConfigLocally(bo()); + }*/ + + bo ReplSetConfig::MemberCfg::asBson() const { + bob b; + b << "_id" << _id; + b.append("host", h.toString()); + if( votes != 1 ) b << "votes" << votes; + if( priority != 1.0 ) b << "priority" << priority; + if( arbiterOnly ) b << "arbiterOnly" << true; + if( slaveDelay ) b << "slaveDelay" << slaveDelay; + if( hidden ) b << "hidden" << hidden; + return b.obj(); + } + + bo ReplSetConfig::asBson() const { + bob b; + b.append("_id", _id).append("version", version); + if( !ho.isDefault() || !getLastErrorDefaults.isEmpty() ) { + bob settings; + if( !ho.isDefault() ) + settings << "heartbeatConnRetries " << ho.heartbeatConnRetries << + "heartbeatSleep" << ho.heartbeatSleepMillis / 1000 << + "heartbeatTimeout" << ho.heartbeatTimeoutMillis / 1000; + if( !getLastErrorDefaults.isEmpty() ) + settings << "getLastErrorDefaults" << getLastErrorDefaults; + b << "settings" << settings.obj(); + } + + BSONArrayBuilder a; + for( unsigned i = 0; i < members.size(); i++ ) + a.append( members[i].asBson() ); + b.append("members", a.arr()); + + return b.obj(); + } + + static inline void mchk(bool expr) { + uassert(13126, "bad Member config", expr); + } + + void ReplSetConfig::MemberCfg::check() const{ + mchk(_id >= 0 && _id <= 255); + mchk(priority >= 0 && priority <= 1000); + mchk(votes >= 0 && votes <= 100); + uassert(13419, "this version of mongod only supports priorities 0 and 1", priority == 0 || priority == 1); + uassert(13437, "slaveDelay requires priority be zero", slaveDelay == 0 || priority == 0); + uassert(13438, "bad slaveDelay value", slaveDelay >= 0 && slaveDelay <= 3600 * 24 * 366); + uassert(13439, "priority must be 0 when hidden=true", priority == 0 || !hidden); + } + + /** @param o old config + @param n new config + */ + /*static*/ bool ReplSetConfig::legalChange(const ReplSetConfig& o, const ReplSetConfig& n, string& errmsg) { + assert( theReplSet ); + + if( o._id != n._id ) { + errmsg = "set name may not change"; + return false; + } + /* TODO : wonder if we need to allow o.version < n.version only, which is more lenient. + if someone had some intermediate config this node doesnt have, that could be + necessary. but then how did we become primary? so perhaps we are fine as-is. + */ + if( o.version + 1 != n.version ) { + errmsg = "version number wrong"; + return false; + } + + map old; + for( vector::const_iterator i = o.members.begin(); i != o.members.end(); i++ ) { + old[i->h] = &(*i); + } + int me = 0; + for( vector::const_iterator i = n.members.begin(); i != n.members.end(); i++ ) { + const ReplSetConfig::MemberCfg& m = *i; + if( old.count(m.h) ) { + if( old[m.h]->_id != m._id ) { + log() << "replSet reconfig error with member: " << m.h.toString() << rsLog; + uasserted(13432, "_id may not change for members"); + } + } + if( m.h.isSelf() ) + me++; + } + + uassert(13433, "can't find self in new replset config", me == 1); + + /* TODO : MORE CHECKS HERE */ + + log() << "replSet TODO : don't allow removal of a node until we handle it at the removed node end?" << endl; + // we could change its votes to zero perhaps instead as a short term... + + return true; + } + + void ReplSetConfig::clear() { + version = -5; + _ok = false; + } + + void ReplSetConfig::checkRsConfig() const { + uassert(13132, + "nonmatching repl set name in _id field; check --replSet command line", + _id == cmdLine.ourSetName()); + uassert(13308, "replSet bad config version #", version > 0); + uassert(13133, "replSet bad config no members", members.size() >= 1); + uassert(13309, "replSet bad config maximum number of members is 7 (for now)", members.size() <= 7); + } + + void ReplSetConfig::from(BSONObj o) { + static const string legal[] = {"_id","version", "members","settings"}; + static const set legals(legal, legal + 4); + assertOnlyHas(o, legals); + + md5 = o.md5(); + _id = o["_id"].String(); + if( o["version"].ok() ) { + version = o["version"].numberInt(); + uassert(13115, "bad " + rsConfigNs + " config: version", version > 0); + } + + if( o["settings"].ok() ) { + BSONObj settings = o["settings"].Obj(); + if( settings["heartbeatConnRetries "].ok() ) + ho.heartbeatConnRetries = settings["heartbeatConnRetries "].numberInt(); + if( settings["heartbeatSleep"].ok() ) + ho.heartbeatSleepMillis = (unsigned) (settings["heartbeatSleep"].Number() * 1000); + if( settings["heartbeatTimeout"].ok() ) + ho.heartbeatTimeoutMillis = (unsigned) (settings["heartbeatTimeout"].Number() * 1000); + ho.check(); + try { getLastErrorDefaults = settings["getLastErrorDefaults"].Obj().copy(); } catch(...) { } + } + + set hosts; + set ords; + vector members; + try { + members = o["members"].Array(); + } + catch(...) { + uasserted(13131, "replSet error parsing (or missing) 'members' field in config object"); + } + + unsigned localhosts = 0; + for( unsigned i = 0; i < members.size(); i++ ) { + BSONObj mobj = members[i].Obj(); + MemberCfg m; + try { + static const string legal[] = {"_id","votes","priority","host","hidden","slaveDelay","arbiterOnly"}; + static const set legals(legal, legal + 7); + assertOnlyHas(mobj, legals); + + try { + m._id = (int) mobj["_id"].Number(); + } catch(...) { + /* TODO: use of string exceptions may be problematic for reconfig case! */ + throw "_id must be numeric"; + } + string s; + try { + s = mobj["host"].String(); + m.h = HostAndPort(s); + } + catch(...) { + throw string("bad or missing host field? ") + mobj.toString(); + } + if( m.h.isLocalHost() ) + localhosts++; + m.arbiterOnly = mobj.getBoolField("arbiterOnly"); + m.slaveDelay = mobj["slaveDelay"].numberInt(); + if( mobj.hasElement("hidden") ) + m.hidden = mobj.getBoolField("hidden"); + if( mobj.hasElement("priority") ) + m.priority = mobj["priority"].Number(); + if( mobj.hasElement("votes") ) + m.votes = (unsigned) mobj["votes"].Number(); + m.check(); + } + catch( const char * p ) { + log() << "replSet cfg parsing exception for members[" << i << "] " << p << rsLog; + stringstream ss; + ss << "replSet members[" << i << "] " << p; + uassert(13107, ss.str(), false); + } + catch(DBException& e) { + log() << "replSet cfg parsing exception for members[" << i << "] " << e.what() << rsLog; + stringstream ss; + ss << "bad config for member[" << i << "] " << e.what(); + uassert(13135, ss.str(), false); + } + if( !(ords.count(m._id) == 0 && hosts.count(m.h.toString()) == 0) ) { + log() << "replSet " << o.toString() << rsLog; + uassert(13108, "bad replset config -- duplicate hosts in the config object?", false); + } + hosts.insert(m.h.toString()); + ords.insert(m._id); + this->members.push_back(m); + } + uassert(13393, "can't use localhost in repl set member names except when using it for all members", localhosts == 0 || localhosts == members.size()); + uassert(13117, "bad " + rsConfigNs + " config", !_id.empty()); + } + + static inline void configAssert(bool expr) { + uassert(13122, "bad repl set config?", expr); + } + + ReplSetConfig::ReplSetConfig(BSONObj cfg) { + clear(); + from(cfg); + configAssert( version < 0 /*unspecified*/ || (version >= 1 && version <= 5000) ); + if( version < 1 ) + version = 1; + _ok = true; + } + + ReplSetConfig::ReplSetConfig(const HostAndPort& h) { + clear(); + int level = 2; + DEV level = 0; + //log(0) << "replSet load config from: " << h.toString() << rsLog; + + auto_ptr c; + int v = -5; + try { + if( h.isSelf() ) { + ; + } + else { + /* first, make sure other node is configured to be a replset. just to be safe. */ + string setname = cmdLine.ourSetName(); + BSONObj cmd = BSON( "replSetHeartbeat" << setname ); + int theirVersion; + BSONObj info; + bool ok = requestHeartbeat(setname, "", h.toString(), info, -2, theirVersion); + if( info["rs"].trueValue() ) { + // yes, it is a replicate set, although perhaps not yet initialized + } + else { + if( !ok ) { + log() << "replSet TEMP !ok heartbeating " << h.toString() << " on cfg load" << rsLog; + if( !info.isEmpty() ) + log() << "replSet info " << h.toString() << " : " << info.toString() << rsLog; + return; + } + { + stringstream ss; + ss << "replSet error: member " << h.toString() << " is not in --replSet mode"; + msgassertedNoTrace(13260, ss.str().c_str()); // not caught as not a user exception - we want it not caught + //for python err# checker: uassert(13260, "", false); + } + } + } + + v = -4; + ScopedConn conn(h.toString()); + v = -3; + c = conn->query(rsConfigNs); + if( c.get() == 0 ) { + version = v; return; + } + if( !c->more() ) { + version = EMPTYCONFIG; + return; + } + version = -1; + } + catch( DBException& e) { + version = v; + log(level) << "replSet load config couldn't get from " << h.toString() << ' ' << e.what() << rsLog; + return; + } + + BSONObj o = c->nextSafe(); + uassert(13109, "multiple rows in " + rsConfigNs + " not supported", !c->more()); + from(o); + checkRsConfig(); + _ok = true; + log(level) << "replSet load config ok from " << (h.isSelf() ? "self" : h.toString()) << rsLog; + } + +} diff -Nru mongodb-1.4.4/db/repl/rs_config.h mongodb-1.6.3/db/repl/rs_config.h --- mongodb-1.4.4/db/repl/rs_config.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_config.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,97 @@ +// rs_config.h +// repl set configuration +// + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../../util/hostandport.h" +#include "health.h" + +namespace mongo { + + /* singleton config object is stored here */ + const string rsConfigNs = "local.system.replset"; + + class ReplSetConfig { + enum { EMPTYCONFIG = -2 }; + public: + /* if something is misconfigured, throws an exception. + if couldn't be queried or is just blank, ok() will be false. + */ + ReplSetConfig(const HostAndPort& h); + + ReplSetConfig(BSONObj cfg); + + bool ok() const { return _ok; } + + struct MemberCfg { + MemberCfg() : _id(-1), votes(1), priority(1.0), arbiterOnly(false), slaveDelay(0), hidden(false) { } + int _id; /* ordinal */ + unsigned votes; /* how many votes this node gets. default 1. */ + HostAndPort h; + double priority; /* 0 means can never be primary */ + bool arbiterOnly; + int slaveDelay; /* seconds. int rather than unsigned for convenient to/front bson conversion. */ + bool hidden; /* if set, don't advertise to drives in isMaster. for non-primaries (priority 0) */ + + void check() const; /* check validity, assert if not. */ + BSONObj asBson() const; + bool potentiallyHot() const { + return !arbiterOnly && priority > 0; + } + bool operator==(const MemberCfg& r) const { + return _id==r._id && votes == r.votes && h == r.h && priority == r.priority && + arbiterOnly == r.arbiterOnly && slaveDelay == r.slaveDelay && hidden == r.hidden; + } + bool operator!=(const MemberCfg& r) const { return !(*this == r); } + }; + + vector members; + string _id; + int version; + HealthOptions ho; + string md5; + BSONObj getLastErrorDefaults; + + list otherMemberHostnames() const; // except self + + /** @return true if could connect, and there is no cfg object there at all */ + bool empty() const { return version == EMPTYCONFIG; } + + string toString() const { return asBson().toString(); } + + /** validate the settings. does not call check() on each member, you have to do that separately. */ + void checkRsConfig() const; + + /** check if modification makes sense */ + static bool legalChange(const ReplSetConfig& old, const ReplSetConfig& n, string& errmsg); + + //static void receivedNewConfig(BSONObj); + void saveConfigLocally(BSONObj comment); // to local db + string saveConfigEverywhere(); // returns textual info on what happened + + BSONObj asBson() const; + + private: + bool _ok; + void from(BSONObj); + void clear(); + }; + +} diff -Nru mongodb-1.4.4/db/repl/rs.cpp mongodb-1.6.3/db/repl/rs.cpp --- mongodb-1.4.4/db/repl/rs.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,579 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../cmdline.h" +#include "../../util/sock.h" +#include "../client.h" +#include "../../client/dbclient.h" +#include "../dbhelpers.h" +#include "rs.h" + +namespace mongo { + + using namespace bson; + + bool replSet = false; + ReplSet *theReplSet = 0; + extern string *discoveredSeed; + + void ReplSetImpl::sethbmsg(string s, int logLevel) { + static time_t lastLogged; + _hbmsgTime = time(0); + + if( s == _hbmsg ) { + // unchanged + if( _hbmsgTime - lastLogged < 60 ) + return; + } + + unsigned sz = s.size(); + if( sz >= 256 ) + memcpy(_hbmsg, s.c_str(), 255); + else { + _hbmsg[sz] = 0; + memcpy(_hbmsg, s.c_str(), sz); + } + if( !s.empty() ) { + lastLogged = _hbmsgTime; + log(logLevel) << "replSet " << s << rsLog; + } + } + + void ReplSetImpl::assumePrimary() { + assert( iAmPotentiallyHot() ); + writelock lk("admin."); // so we are synchronized with _logOp() + box.setSelfPrimary(_self); + //log() << "replSet PRIMARY" << rsLog; // self (" << _self->id() << ") is now primary" << rsLog; + } + + void ReplSetImpl::changeState(MemberState s) { box.change(s, _self); } + + void ReplSetImpl::relinquish() { + if( box.getState().primary() ) { + log() << "replSet relinquishing primary state" << rsLog; + changeState(MemberState::RS_RECOVERING); + + /* close sockets that were talking to us */ + /*log() << "replSet closing sockets after reqlinquishing primary" << rsLog; + MessagingPort::closeAllSockets(1);*/ + + // todo: > + //changeState(MemberState::RS_SECONDARY); + } + else if( box.getState().startup2() ) { + // ? add comment + changeState(MemberState::RS_RECOVERING); + } + } + + /* look freshly for who is primary - includes relinquishing ourself. */ + void ReplSetImpl::forgetPrimary() { + if( box.getState().primary() ) + relinquish(); + else { + box.setOtherPrimary(0); + } + } + + bool ReplSetImpl::_stepDown() { + lock lk(this); + if( box.getState().primary() ) { + changeState(MemberState::RS_RECOVERING); + elect.steppedDown = time(0) + 60; + log() << "replSet info stepped down as primary" << rsLog; + return true; + } + return false; + } + + void ReplSetImpl::msgUpdateHBInfo(HeartbeatInfo h) { + for( Member *m = _members.head(); m; m=m->next() ) { + if( m->id() == h.id() ) { + m->_hbinfo = h; + return; + } + } + } + + list ReplSetImpl::memberHostnames() const { + list L; + L.push_back(_self->h()); + for( Member *m = _members.head(); m; m = m->next() ) + L.push_back(m->h()); + return L; + } + + void ReplSetImpl::_fillIsMasterHost(const Member *m, vector& hosts, vector& passives, vector& arbiters) { + if( m->config().hidden ) + return; + + if( m->potentiallyHot() ) { + hosts.push_back(m->h().toString()); + } + else if( !m->config().arbiterOnly ) { + if( m->config().slaveDelay ) { + /* hmmm - we don't list these as they are stale. */ + } else { + passives.push_back(m->h().toString()); + } + } + else { + arbiters.push_back(m->h().toString()); + } + } + + void ReplSetImpl::_fillIsMaster(BSONObjBuilder& b) { + const StateBox::SP sp = box.get(); + bool isp = sp.state.primary(); + b.append("setName", name()); + b.append("ismaster", isp); + b.append("secondary", sp.state.secondary()); + { + vector hosts, passives, arbiters; + _fillIsMasterHost(_self, hosts, passives, arbiters); + + for( Member *m = _members.head(); m; m = m->next() ) { + _fillIsMasterHost(m, hosts, passives, arbiters); + } + + if( hosts.size() > 0 ) { + b.append("hosts", hosts); + } + if( passives.size() > 0 ) { + b.append("passives", passives); + } + if( arbiters.size() > 0 ) { + b.append("arbiters", arbiters); + } + } + + if( !isp ) { + const Member *m = sp.primary; + if( m ) + b.append("primary", m->h().toString()); + } + if( myConfig().arbiterOnly ) + b.append("arbiterOnly", true); + if( myConfig().slaveDelay ) + b.append("slaveDelay", myConfig().slaveDelay); + if( myConfig().hidden ) + b.append("hidden", true); + } + + /** @param cfgString /, */ + + void parseReplsetCmdLine(string cfgString, string& setname, vector& seeds, set& seedSet ) { + const char *p = cfgString.c_str(); + const char *slash = strchr(p, '/'); + if( slash ) + setname = string(p, slash-p); + else + setname = p; + uassert(13093, "bad --replSet config string format is: [/,,...]", !setname.empty()); + + if( slash == 0 ) + return; + + p = slash + 1; + while( 1 ) { + const char *comma = strchr(p, ','); + if( comma == 0 ) comma = strchr(p,0); + if( p == comma ) + break; + { + HostAndPort m; + try { + m = HostAndPort( string(p, comma-p) ); + } + catch(...) { + uassert(13114, "bad --replSet seed hostname", false); + } + uassert(13096, "bad --replSet command line config string - dups?", seedSet.count(m) == 0 ); + seedSet.insert(m); + //uassert(13101, "can't use localhost in replset host list", !m.isLocalHost()); + if( m.isSelf() ) { + log(1) << "replSet ignoring seed " << m.toString() << " (=self)" << rsLog; + } else + seeds.push_back(m); + if( *comma == 0 ) + break; + p = comma + 1; + } + } + } + + ReplSetImpl::ReplSetImpl(ReplSetCmdline& replSetCmdline) : elect(this), + _self(0), + mgr( new Manager(this) ) + { + _cfg = 0; + memset(_hbmsg, 0, sizeof(_hbmsg)); + *_hbmsg = '.'; // temp...just to see + lastH = 0; + changeState(MemberState::RS_STARTUP); + + _seeds = &replSetCmdline.seeds; + //for( vector::iterator i = seeds->begin(); i != seeds->end(); i++ ) + // addMemberIfMissing(*i); + + log(1) << "replSet beginning startup..." << rsLog; + + loadConfig(); + + unsigned sss = replSetCmdline.seedSet.size(); + for( Member *m = head(); m; m = m->next() ) { + replSetCmdline.seedSet.erase(m->h()); + } + for( set::iterator i = replSetCmdline.seedSet.begin(); i != replSetCmdline.seedSet.end(); i++ ) { + if( i->isSelf() ) { + if( sss == 1 ) + log(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog; + } else + log() << "replSet warning command line seed " << i->toString() << " is not present in the current repl set config" << rsLog; + } + } + + void newReplUp(); + + void ReplSetImpl::loadLastOpTimeWritten() { + //assert( lastOpTimeWritten.isNull() ); + readlock lk(rsoplog); + BSONObj o; + if( Helpers::getLast(rsoplog, o) ) { + lastH = o["h"].numberLong(); + lastOpTimeWritten = o["ts"]._opTime(); + uassert(13290, "bad replSet oplog entry?", !lastOpTimeWritten.isNull()); + } + } + + /* call after constructing to start - returns fairly quickly after launching its threads */ + void ReplSetImpl::_go() { + try { + loadLastOpTimeWritten(); + } + catch(std::exception& e) { + log() << "replSet error fatal couldn't query the local " << rsoplog << " collection. Terminating mongod after 30 seconds." << rsLog; + log() << e.what() << rsLog; + sleepsecs(30); + dbexit( EXIT_REPLICATION_ERROR ); + return; + } + + changeState(MemberState::RS_STARTUP2); + startThreads(); + newReplUp(); // oplog.cpp + } + + ReplSetImpl::StartupStatus ReplSetImpl::startupStatus = PRESTART; + string ReplSetImpl::startupStatusMsg; + + extern BSONObj *getLastErrorDefault; + + /** @param reconf true if this is a reconfiguration and not an initial load of the configuration. + @return true if ok; throws if config really bad; false if config doesn't include self + */ + bool ReplSetImpl::initFromConfig(ReplSetConfig& c, bool reconf) { + /* NOTE: haveNewConfig() writes the new config to disk before we get here. So + we cannot error out at this point, except fatally. Check errors earlier. + */ + lock lk(this); + + if( getLastErrorDefault || !c.getLastErrorDefaults.isEmpty() ) { + // see comment in dbcommands.cpp for getlasterrordefault + getLastErrorDefault = new BSONObj( c.getLastErrorDefaults ); + } + + list newOnes; + bool additive = reconf; + { + unsigned nfound = 0; + int me = 0; + for( vector::iterator i = c.members.begin(); i != c.members.end(); i++ ) { + const ReplSetConfig::MemberCfg& m = *i; + if( m.h.isSelf() ) { + nfound++; + me++; + + if( !reconf || (_self && _self->id() == (unsigned) m._id) ) + ; + else { + log() << "replSet " << _self->id() << ' ' << m._id << rsLog; + assert(false); + } + } + else if( reconf ) { + const Member *old = findById(m._id); + if( old ) { + nfound++; + assert( (int) old->id() == m._id ); + if( old->config() == m ) { + additive = false; + } + } + else { + newOnes.push_back(&m); + } + } + } + if( me == 0 ) { + // log() << "replSet config : " << _cfg->toString() << rsLog; + log() << "replSet error can't find self in the repl set configuration:" << rsLog; + log() << c.toString() << rsLog; + assert(false); + } + uassert( 13302, "replSet error self appears twice in the repl set configuration", me<=1 ); + + if( reconf && config().members.size() != nfound ) + additive = false; + } + + _cfg = new ReplSetConfig(c); + assert( _cfg->ok() ); + assert( _name.empty() || _name == _cfg->_id ); + _name = _cfg->_id; + assert( !_name.empty() ); + + if( additive ) { + log() << "replSet info : additive change to configuration" << rsLog; + for( list::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) { + const ReplSetConfig::MemberCfg* m = *i; + Member *mi = new Member(m->h, m->_id, m, false); + + /** we will indicate that new members are up() initially so that we don't relinquish our + primary state because we can't (transiently) see a majority. they should be up as we + check that new members are up before getting here on reconfig anyway. + */ + mi->get_hbinfo().health = 0.1; + + _members.push(mi); + startHealthTaskFor(mi); + } + return true; + } + + // start with no members. if this is a reconfig, drop the old ones. + _members.orphanAll(); + + endOldHealthTasks(); + + int oldPrimaryId = -1; + { + const Member *p = box.getPrimary(); + if( p ) + oldPrimaryId = p->id(); + } + forgetPrimary(); + _self = 0; + for( vector::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) { + const ReplSetConfig::MemberCfg& m = *i; + Member *mi; + if( m.h.isSelf() ) { + assert( _self == 0 ); + mi = _self = new Member(m.h, m._id, &m, true); + if( (int)mi->id() == oldPrimaryId ) + box.setSelfPrimary(mi); + } else { + mi = new Member(m.h, m._id, &m, false); + _members.push(mi); + startHealthTaskFor(mi); + if( (int)mi->id() == oldPrimaryId ) + box.setOtherPrimary(mi); + } + } + return true; + } + + // Our own config must be the first one. + bool ReplSetImpl::_loadConfigFinish(vector& cfgs) { + int v = -1; + ReplSetConfig *highest = 0; + int myVersion = -2000; + int n = 0; + for( vector::iterator i = cfgs.begin(); i != cfgs.end(); i++ ) { + ReplSetConfig& cfg = *i; + if( ++n == 1 ) myVersion = cfg.version; + if( cfg.ok() && cfg.version > v ) { + highest = &cfg; + v = cfg.version; + } + } + assert( highest ); + + if( !initFromConfig(*highest) ) + return false; + + if( highest->version > myVersion && highest->version >= 0 ) { + log() << "replSet got config version " << highest->version << " from a remote, saving locally" << rsLog; + writelock lk("admin."); + highest->saveConfigLocally(BSONObj()); + } + return true; + } + + void ReplSetImpl::loadConfig() { + while( 1 ) { + startupStatus = LOADINGCONFIG; + startupStatusMsg = "loading " + rsConfigNs + " config (LOADINGCONFIG)"; + try { + vector configs; + try { + configs.push_back( ReplSetConfig(HostAndPort::me()) ); + } + catch(DBException& e) { + log() << "replSet exception loading our local replset configuration object : " << e.toString() << rsLog; + throw; + } + for( vector::const_iterator i = _seeds->begin(); i != _seeds->end(); i++ ) { + try { + configs.push_back( ReplSetConfig(*i) ); + } + catch( DBException& e ) { + log() << "replSet exception trying to load config from " << *i << " : " << e.toString() << rsLog; + } + } + + if( discoveredSeed ) { + try { + configs.push_back( ReplSetConfig(HostAndPort(*discoveredSeed)) ); + } + catch( DBException& ) { + log(1) << "replSet exception trying to load config from discovered seed " << *discoveredSeed << rsLog; + } + } + + int nok = 0; + int nempty = 0; + for( vector::iterator i = configs.begin(); i != configs.end(); i++ ) { + if( i->ok() ) + nok++; + if( i->empty() ) + nempty++; + } + if( nok == 0 ) { + + if( nempty == (int) configs.size() ) { + startupStatus = EMPTYCONFIG; + startupStatusMsg = "can't get " + rsConfigNs + " config from self or any seed (EMPTYCONFIG)"; + log() << "replSet can't get " << rsConfigNs << " config from self or any seed (EMPTYCONFIG)" << rsLog; + log(1) << "replSet have you ran replSetInitiate yet?" << rsLog; + if( _seeds->size() == 0 ) + log(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog; + } + else { + startupStatus = EMPTYUNREACHABLE; + startupStatusMsg = "can't currently get " + rsConfigNs + " config from self or any seed (EMPTYUNREACHABLE)"; + log() << "replSet can't get " << rsConfigNs << " config from self or any seed (yet)" << rsLog; + } + + sleepsecs(10); + continue; + } + + if( !_loadConfigFinish(configs) ) { + log() << "replSet info Couldn't load config yet. Sleeping 20sec and will try again." << rsLog; + sleepsecs(20); + continue; + } + } + catch(DBException& e) { + startupStatus = BADCONFIG; + startupStatusMsg = "replSet error loading set config (BADCONFIG)"; + log() << "replSet error loading configurations " << e.toString() << rsLog; + log() << "replSet error replication will not start" << rsLog; + sethbmsg("error loading set config"); + _fatal(); + throw; + } + break; + } + startupStatusMsg = "? started"; + startupStatus = STARTED; + } + + void ReplSetImpl::_fatal() + { + //lock l(this); + box.set(MemberState::RS_FATAL, 0); + //sethbmsg("fatal error"); + log() << "replSet error fatal, stopping replication" << rsLog; + } + + void ReplSet::haveNewConfig(ReplSetConfig& newConfig, bool addComment) { + lock l(this); // convention is to lock replset before taking the db rwlock + writelock lk(""); + bo comment; + if( addComment ) + comment = BSON( "msg" << "Reconfig set" << "version" << newConfig.version ); + newConfig.saveConfigLocally(comment); + try { + initFromConfig(newConfig, true); + log() << "replSet replSetReconfig new config saved locally" << rsLog; + } + catch(DBException& e) { + log() << "replSet error unexpected exception in haveNewConfig() : " << e.toString() << rsLog; + _fatal(); + } + catch(...) { + log() << "replSet error unexpected exception in haveNewConfig()" << rsLog; + _fatal(); + } + } + + void Manager::msgReceivedNewConfig(BSONObj o) { + log() << "replset msgReceivedNewConfig version: " << o["version"].toString() << rsLog; + ReplSetConfig c(o); + if( c.version > rs->config().version ) + theReplSet->haveNewConfig(c, false); + else { + log() << "replSet info msgReceivedNewConfig but version isn't higher " << + c.version << ' ' << rs->config().version << rsLog; + } + } + + /* forked as a thread during startup + it can run quite a while looking for config. but once found, + a separate thread takes over as ReplSetImpl::Manager, and this thread + terminates. + */ + void startReplSets(ReplSetCmdline *replSetCmdline) { + Client::initThread("startReplSets"); + try { + assert( theReplSet == 0 ); + if( replSetCmdline == 0 ) { + assert(!replSet); + return; + } + (theReplSet = new ReplSet(*replSetCmdline))->go(); + } + catch(std::exception& e) { + log() << "replSet caught exception in startReplSets thread: " << e.what() << rsLog; + if( theReplSet ) + theReplSet->fatal(); + } + cc().shutdown(); + } + +} + +namespace boost { + + void assertion_failed(char const * expr, char const * function, char const * file, long line) + { + mongo::log() << "boost assertion failure " << expr << ' ' << function << ' ' << file << ' ' << line << endl; + } + +} diff -Nru mongodb-1.4.4/db/repl/rs_exception.h mongodb-1.6.3/db/repl/rs_exception.h --- mongodb-1.4.4/db/repl/rs_exception.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_exception.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,17 @@ +// @file rs_exception.h + +#pragma once + +namespace mongo { + + class VoteException : public std::exception { + public: + const char * what() const throw () { return "VoteException"; } + }; + + class RetryAfterSleepException : public std::exception { + public: + const char * what() const throw () { return "RetryAfterSleepException"; } + }; + +} diff -Nru mongodb-1.4.4/db/repl/rs.h mongodb-1.6.3/db/repl/rs.h --- mongodb-1.4.4/db/repl/rs.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,425 @@ +// /db/repl/rs.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../../util/concurrency/list.h" +#include "../../util/concurrency/value.h" +#include "../../util/concurrency/msg.h" +#include "../../util/hostandport.h" +#include "../commands.h" +#include "rs_exception.h" +#include "rs_optime.h" +#include "rs_member.h" +#include "rs_config.h" + +namespace mongo { + + struct HowToFixUp; + struct Target; + class DBClientConnection; + class ReplSetImpl; + class OplogReader; + extern bool replSet; // true if using repl sets + extern class ReplSet *theReplSet; // null until initialized + extern Tee *rsLog; + + /* member of a replica set */ + class Member : public List1::Base { + public: + Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self); + string fullName() const { return h().toString(); } + const ReplSetConfig::MemberCfg& config() const { return _config; } + const HeartbeatInfo& hbinfo() const { return _hbinfo; } + HeartbeatInfo& get_hbinfo() { return _hbinfo; } + string lhb() const { return _hbinfo.lastHeartbeatMsg; } + MemberState state() const { return _hbinfo.hbstate; } + const HostAndPort& h() const { return _h; } + unsigned id() const { return _hbinfo.id(); } + bool potentiallyHot() const { return _config.potentiallyHot(); } // not arbiter, not priority 0 + void summarizeMember(stringstream& s) const; + friend class ReplSetImpl; + private: + const ReplSetConfig::MemberCfg _config; + const HostAndPort _h; + HeartbeatInfo _hbinfo; + }; + + class Manager : public task::Server { + ReplSetImpl *rs; + bool busyWithElectSelf; + int _primary; + + /** @param two - if true two primaries were seen. this can happen transiently, in addition to our + polling being only occasional. in this case null is returned, but the caller should + not assume primary itself in that situation. + */ + const Member* findOtherPrimary(bool& two); + + void noteARemoteIsPrimary(const Member *); + virtual void starting(); + public: + Manager(ReplSetImpl *rs); + ~Manager(); + void msgReceivedNewConfig(BSONObj); + void msgCheckNewState(); + }; + + struct Target; + + class Consensus { + ReplSetImpl &rs; + struct LastYea { + LastYea() : when(0), who(0xffffffff) { } + time_t when; + unsigned who; + }; + Atomic ly; + unsigned yea(unsigned memberId); // throws VoteException + void electionFailed(unsigned meid); + void _electSelf(); + bool weAreFreshest(bool& allUp, int& nTies); + bool sleptLast; // slept last elect() pass + public: + Consensus(ReplSetImpl *t) : rs(*t) { + sleptLast = false; + steppedDown = 0; + } + + /* if we've stepped down, this is when we are allowed to try to elect ourself again. + todo: handle possible weirdnesses at clock skews etc. + */ + time_t steppedDown; + + int totalVotes() const; + bool aMajoritySeemsToBeUp() const; + bool shouldRelinquish() const; + void electSelf(); + void electCmdReceived(BSONObj, BSONObjBuilder*); + void multiCommand(BSONObj cmd, list& L); + }; + + /** most operations on a ReplSet object should be done while locked. that logic implemented here. */ + class RSBase : boost::noncopyable { + public: + const unsigned magic; + void assertValid() { assert( magic == 0x12345677 ); } + private: + mutex m; + int _locked; + ThreadLocalValue _lockedByMe; + protected: + RSBase() : magic(0x12345677), m("RSBase"), _locked(0) { } + ~RSBase() { + /* this can happen if we throw in the constructor; otherwise never happens. thus we log it as it is quite unusual. */ + log() << "replSet ~RSBase called" << rsLog; + } + + class lock { + RSBase& rsbase; + auto_ptr sl; + public: + lock(RSBase* b) : rsbase(*b) { + if( rsbase._lockedByMe.get() ) + return; // recursive is ok... + + sl.reset( new scoped_lock(rsbase.m) ); + DEV assert(rsbase._locked == 0); + rsbase._locked++; + rsbase._lockedByMe.set(true); + } + ~lock() { + if( sl.get() ) { + assert( rsbase._lockedByMe.get() ); + DEV assert(rsbase._locked == 1); + rsbase._lockedByMe.set(false); + rsbase._locked--; + } + } + }; + + public: + /* for asserts */ + bool locked() const { return _locked != 0; } + + /* if true, is locked, and was locked by this thread. note if false, it could be in the lock or not for another + just for asserts & such so we can make the contracts clear on who locks what when. + we don't use these locks that frequently, so the little bit of overhead is fine. + */ + bool lockedByMe() { return _lockedByMe.get(); } + }; + + class ReplSetHealthPollTask; + + /* safe container for our state that keeps member pointer and state variables always aligned */ + class StateBox : boost::noncopyable { + public: + struct SP { // SP is like pair but nicer + SP() : state(MemberState::RS_STARTUP), primary(0) { } + MemberState state; + const Member *primary; + }; + const SP get() { + scoped_lock lk(m); + return sp; + } + MemberState getState() const { return sp.state; } + const Member* getPrimary() const { return sp.primary; } + void change(MemberState s, const Member *self) { + scoped_lock lk(m); + if( sp.state != s ) { + log() << "replSet " << s.toString() << rsLog; + } + sp.state = s; + if( s.primary() ) { + sp.primary = self; + } + else { + if( self == sp.primary ) + sp.primary = 0; + } + } + void set(MemberState s, const Member *p) { + scoped_lock lk(m); + sp.state = s; sp.primary = p; + } + void setSelfPrimary(const Member *self) { change(MemberState::RS_PRIMARY, self); } + void setOtherPrimary(const Member *mem) { + scoped_lock lk(m); + assert( !sp.state.primary() ); + sp.primary = mem; + } + void noteRemoteIsPrimary(const Member *remote) { + scoped_lock lk(m); + if( !sp.state.secondary() && !sp.state.fatal() ) + sp.state = MemberState::RS_RECOVERING; + sp.primary = remote; + } + StateBox() : m("StateBox") { } + private: + mutex m; + SP sp; + }; + + void parseReplsetCmdLine(string cfgString, string& setname, vector& seeds, set& seedSet ); + + /** Parameter given to the --replSet command line option (parsed). + Syntax is "/," + where setname is a name and seedhost is "[:]" */ + class ReplSetCmdline { + public: + ReplSetCmdline(string cfgString) { parseReplsetCmdLine(cfgString, setname, seeds, seedSet); } + string setname; + vector seeds; + set seedSet; + }; + + /* information about the entire repl set, such as the various servers in the set, and their state */ + /* note: We currently do not free mem when the set goes away - it is assumed the replset is a + singleton and long lived. + */ + class ReplSetImpl : protected RSBase { + public: + /** info on our state if the replset isn't yet "up". for example, if we are pre-initiation. */ + enum StartupStatus { + PRESTART=0, LOADINGCONFIG=1, BADCONFIG=2, EMPTYCONFIG=3, + EMPTYUNREACHABLE=4, STARTED=5, SOON=6 + }; + static StartupStatus startupStatus; + static string startupStatusMsg; + static string stateAsHtml(MemberState state); + + /* todo thread */ + void msgUpdateHBInfo(HeartbeatInfo); + + StateBox box; + + OpTime lastOpTimeWritten; + long long lastH; // hash we use to make sure we are reading the right flow of ops and aren't on an out-of-date "fork" + private: + set healthTasks; + void endOldHealthTasks(); + void startHealthTaskFor(Member *m); + + Consensus elect; + void relinquish(); + void forgetPrimary(); + protected: + bool _stepDown(); + private: + void assumePrimary(); + void loadLastOpTimeWritten(); + void changeState(MemberState s); + protected: + // "heartbeat message" + // sent in requestHeartbeat respond in field "hbm" + char _hbmsg[256]; // we change this unlocked, thus not an stl::string + time_t _hbmsgTime; // when it was logged + public: + void sethbmsg(string s, int logLevel = 0); + protected: + bool initFromConfig(ReplSetConfig& c, bool reconf=false); // true if ok; throws if config really bad; false if config doesn't include self + void _fillIsMaster(BSONObjBuilder&); + void _fillIsMasterHost(const Member*, vector&, vector&, vector&); + const ReplSetConfig& config() { return *_cfg; } + string name() const { return _name; } /* @return replica set's logical name */ + MemberState state() const { return box.getState(); } + void _fatal(); + void _getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const; + void _summarizeAsHtml(stringstream&) const; + void _summarizeStatus(BSONObjBuilder&) const; // for replSetGetStatus command + + /* throws exception if a problem initializing. */ + ReplSetImpl(ReplSetCmdline&); + + /* call afer constructing to start - returns fairly quickly after launching its threads */ + void _go(); + + private: + string _name; + const vector *_seeds; + ReplSetConfig *_cfg; + + /** load our configuration from admin.replset. try seed machines too. + @return true if ok; throws if config really bad; false if config doesn't include self + */ + bool _loadConfigFinish(vector& v); + void loadConfig(); + + list memberHostnames() const; + const ReplSetConfig::MemberCfg& myConfig() const { return _self->config(); } + bool iAmArbiterOnly() const { return myConfig().arbiterOnly; } + bool iAmPotentiallyHot() const { return myConfig().potentiallyHot(); } + protected: + Member *_self; + private: + List1 _members; /* all members of the set EXCEPT self. */ + + public: + unsigned selfId() const { return _self->id(); } + Manager *mgr; + + private: + Member* head() const { return _members.head(); } + public: + const Member* findById(unsigned id) const; + private: + void _getTargets(list&, int &configVersion); + void getTargets(list&, int &configVersion); + void startThreads(); + friend class FeedbackThread; + friend class CmdReplSetElect; + friend class Member; + friend class Manager; + friend class Consensus; + + private: + /* pulling data from primary related - see rs_sync.cpp */ + bool initialSyncOplogApplication(string hn, const Member *primary, OpTime applyGTE, OpTime minValid); + void _syncDoInitialSync(); + void syncDoInitialSync(); + void _syncThread(); + bool tryToGoLiveAsASecondary(OpTime&); // readlocks + void syncTail(); + void syncApply(const BSONObj &o); + unsigned _syncRollback(OplogReader& r); + void syncRollback(OplogReader& r); + void syncFixUp(HowToFixUp& h, OplogReader& r); + public: + void syncThread(); + }; + + class ReplSet : public ReplSetImpl { + public: + ReplSet(ReplSetCmdline& replSetCmdline) : ReplSetImpl(replSetCmdline) { } + + bool stepDown() { return _stepDown(); } + + string selfFullName() { + lock lk(this); + return _self->fullName(); + } + + /* call after constructing to start - returns fairly quickly after la[unching its threads */ + void go() { _go(); } + + void fatal() { _fatal(); } + bool isPrimary() { return box.getState().primary(); } + bool isSecondary() { return box.getState().secondary(); } + MemberState state() const { return ReplSetImpl::state(); } + string name() const { return ReplSetImpl::name(); } + const ReplSetConfig& config() { return ReplSetImpl::config(); } + void getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const { _getOplogDiagsAsHtml(server_id,ss); } + void summarizeAsHtml(stringstream& ss) const { _summarizeAsHtml(ss); } + void summarizeStatus(BSONObjBuilder& b) const { _summarizeStatus(b); } + void fillIsMaster(BSONObjBuilder& b) { _fillIsMaster(b); } + + /* we have a new config (reconfig) - apply it. + @param comment write a no-op comment to the oplog about it. only makes sense if one is primary and initiating the reconf. + */ + void haveNewConfig(ReplSetConfig& c, bool comment); + + /* if we delete old configs, this needs to assure locking. currently we don't so it is ok. */ + const ReplSetConfig& getConfig() { return config(); } + + bool lockedByMe() { return RSBase::lockedByMe(); } + + // heartbeat msg to send to others; descriptive diagnostic info + string hbmsg() const { + if( time(0)-_hbmsgTime > 120 ) return ""; + return _hbmsg; + } + }; + + /** base class for repl set commands. checks basic things such as in rs mode before the command + does its real work + */ + class ReplSetCommand : public Command { + protected: + ReplSetCommand(const char * s, bool show=false) : Command(s, show) { } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return true; } + virtual bool logTheOp() { return false; } + virtual LockType locktype() const { return NONE; } + virtual void help( stringstream &help ) const { help << "internal"; } + bool check(string& errmsg, BSONObjBuilder& result) { + if( !replSet ) { + errmsg = "not running with --replSet"; + return false; + } + if( theReplSet == 0 ) { + result.append("startupStatus", ReplSet::startupStatus); + errmsg = ReplSet::startupStatusMsg.empty() ? "replset unknown error 2" : ReplSet::startupStatusMsg; + if( ReplSet::startupStatus == 3 ) + result.append("info", "run rs.initiate(...) if not yet done for the set"); + return false; + } + return true; + } + }; + + /** inlines ----------------- */ + + inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) : + _config(*c), _h(h), _hbinfo(ord) + { + if( self ) + _hbinfo.health = 1.0; + } + +} diff -Nru mongodb-1.4.4/db/repl/rs_initialsync.cpp mongodb-1.6.3/db/repl/rs_initialsync.cpp --- mongodb-1.4.4/db/repl/rs_initialsync.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_initialsync.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,214 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../client.h" +#include "../../client/dbclient.h" +#include "rs.h" +#include "../oplogreader.h" +#include "../../util/mongoutils/str.h" +#include "../dbhelpers.h" +#include "rs_optime.h" +#include "../oplog.h" + +namespace mongo { + + using namespace mongoutils; + using namespace bson; + + void dropAllDatabasesExceptLocal(); + + // add try/catch with sleep + + void isyncassert(const char *msg, bool expr) { + if( !expr ) { + string m = str::stream() << "initial sync " << msg; + theReplSet->sethbmsg(m, 0); + uasserted(13404, m); + } + } + + void ReplSetImpl::syncDoInitialSync() { + while( 1 ) { + try { + _syncDoInitialSync(); + break; + } + catch(DBException& e) { + sethbmsg("initial sync exception " + e.toString(), 0); + sleepsecs(30); + } + } + } + + bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, + bool slaveOk, bool useReplAuth, bool snapshot); + + /* todo : progress metering to sethbmsg. */ + static bool clone(const char *master, string db) { + string err; + return cloneFrom(master, err, db, false, + /*slaveok later can be true*/ false, true, false); + } + + void _logOpObjRS(const BSONObj& op); + + bool copyCollectionFromRemote(const string& host, const string& ns, const BSONObj& query, string &errmsg, bool logforrepl); + + static void emptyOplog() { + writelock lk(rsoplog); + Client::Context ctx(rsoplog); + NamespaceDetails *d = nsdetails(rsoplog); + + // temp + if( d && d->nrecords == 0 ) + return; // already empty, ok. + + log(1) << "replSet empty oplog" << rsLog; + d->emptyCappedCollection(rsoplog); + + /* + string errmsg; + bob res; + dropCollection(rsoplog, errmsg, res); + log() << "replSet recreated oplog so it is empty. todo optimize this..." << rsLog; + createOplog();*/ + + // TEMP: restart to recreate empty oplog + //log() << "replSet FATAL error during initial sync. mongod restart required." << rsLog; + //dbexit( EXIT_CLEAN ); + + /* + writelock lk(rsoplog); + Client::Context c(rsoplog, dbpath, 0, doauth/false); + NamespaceDetails *oplogDetails = nsdetails(rsoplog); + uassert(13412, str::stream() << "replSet error " << rsoplog << " is missing", oplogDetails != 0); + oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false); + */ + } + + void ReplSetImpl::_syncDoInitialSync() { + sethbmsg("initial sync pending",0); + + StateBox::SP sp = box.get(); + assert( !sp.state.primary() ); // wouldn't make sense if we were. + + const Member *cp = sp.primary; + if( cp == 0 ) { + sethbmsg("initial sync need a member to be primary",0); + sleepsecs(15); + return; + } + + string masterHostname = cp->h().toString(); + OplogReader r; + if( !r.connect(masterHostname) ) { + sethbmsg( str::stream() << "initial sync couldn't connect to " << cp->h().toString() , 0); + sleepsecs(15); + return; + } + + BSONObj lastOp = r.getLastOp(rsoplog); + if( lastOp.isEmpty() ) { + sethbmsg("initial sync couldn't read remote oplog", 0); + sleepsecs(15); + return; + } + OpTime startingTS = lastOp["ts"]._opTime(); + + { + /* make sure things aren't too flappy */ + sleepsecs(5); + isyncassert( "flapping?", box.getPrimary() == cp ); + BSONObj o = r.getLastOp(rsoplog); + isyncassert( "flapping [2]?", !o.isEmpty() ); + } + + sethbmsg("initial sync drop all databases", 0); + dropAllDatabasesExceptLocal(); + +// sethbmsg("initial sync drop oplog", 0); +// emptyOplog(); + + list dbs = r.conn()->getDatabaseNames(); + for( list::iterator i = dbs.begin(); i != dbs.end(); i++ ) { + string db = *i; + if( db != "local" ) { + sethbmsg( str::stream() << "initial sync cloning db: " << db , 0); + bool ok; + { + writelock lk(db); + Client::Context ctx(db); + ok = clone(masterHostname.c_str(), db); + } + if( !ok ) { + sethbmsg( str::stream() << "initial sync error clone of " << db << " failed sleeping 5 minutes" ,0); + sleepsecs(300); + return; + } + } + } + + sethbmsg("initial sync query minValid",0); + + /* our cloned copy will be strange until we apply oplog events that occurred + through the process. we note that time point here. */ + BSONObj minValid = r.getLastOp(rsoplog); + assert( !minValid.isEmpty() ); + OpTime mvoptime = minValid["ts"]._opTime(); + assert( !mvoptime.isNull() ); + + /* copy the oplog + */ + { + sethbmsg("initial sync copy+apply oplog"); + if( ! initialSyncOplogApplication(masterHostname, cp, startingTS, mvoptime) ) { // note we assume here that this call does not throw + log() << "replSet initial sync failed during applyoplog" << rsLog; + emptyOplog(); // otherwise we'll be up! + lastOpTimeWritten = OpTime(); + lastH = 0; + log() << "replSet cleaning up [1]" << rsLog; + { + writelock lk("local."); + Client::Context cx( "local." ); + cx.db()->flushFiles(true); + } + log() << "replSet cleaning up [2]" << rsLog; + sleepsecs(2); + return; + } + } + + sethbmsg("initial sync finishing up",0); + + assert( !box.getState().primary() ); // wouldn't make sense if we were. + + { + writelock lk("local."); + Client::Context cx( "local." ); + cx.db()->flushFiles(true); + try { + log() << "replSet set minValid=" << minValid["ts"]._opTime().toString() << rsLog; + } + catch(...) { } + Helpers::putSingleton("local.replset.minvalid", minValid); + cx.db()->flushFiles(true); + } + + sethbmsg("initial sync done",0); + } + +} diff -Nru mongodb-1.4.4/db/repl/rs_initiate.cpp mongodb-1.6.3/db/repl/rs_initiate.cpp --- mongodb-1.4.4/db/repl/rs_initiate.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_initiate.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,238 @@ +/* @file rs_initiate.cpp + */ + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../cmdline.h" +#include "../commands.h" +#include "../../util/mmap.h" +#include "../../util/mongoutils/str.h" +#include "health.h" +#include "rs.h" +#include "rs_config.h" +#include "../dbhelpers.h" + +using namespace bson; +using namespace mongoutils; + +namespace mongo { + + /* called on a reconfig AND on initiate + throws + @param initial true when initiating + */ + void checkMembersUpForConfigChange(const ReplSetConfig& cfg, bool initial) { + int failures = 0; + int me = 0; + for( vector::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) { + if( i->h.isSelf() ) { + me++; + if( !i->potentiallyHot() ) { + uasserted(13420, "initiation and reconfiguration of a replica set must be sent to a node that can become primary"); + } + } + } + uassert(13278, "bad config - dups?", me <= 1); // dups? + uassert(13279, "can't find self in the replset config", me == 1); + + for( vector::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) { + BSONObj res; + { + bool ok = false; + try { + int theirVersion = -1000; + ok = requestHeartbeat(cfg._id, "", i->h.toString(), res, -1, theirVersion, initial/*check if empty*/); + if( theirVersion >= cfg.version ) { + stringstream ss; + ss << "replSet member " << i->h.toString() << " has too new a config version (" << theirVersion << ") to reconfigure"; + uasserted(13259, ss.str()); + } + } + catch(DBException& e) { + log() << "replSet cmufcc requestHeartbeat " << i->h.toString() << " : " << e.toString() << rsLog; + } + catch(...) { + log() << "replSet cmufcc error exception in requestHeartbeat?" << rsLog; + } + if( res.getBoolField("mismatch") ) + uasserted(13145, "set name does not match the set name host " + i->h.toString() + " expects"); + if( *res.getStringField("set") ) { + if( cfg.version <= 1 ) { + // this was to be initiation, no one shoudl be initiated already. + uasserted(13256, "member " + i->h.toString() + " is already initiated"); + } + else { + // Assure no one has a newer config. + if( res["v"].Int() >= cfg.version ) { + uasserted(13341, "member " + i->h.toString() + " has a config version >= to the new cfg version; cannot change config"); + } + } + } + if( !ok && !res["rs"].trueValue() ) { + if( !res.isEmpty() ) { + /* strange. got a response, but not "ok". log it. */ + log() << "replSet warning " << i->h.toString() << " replied: " << res.toString() << rsLog; + } + + bool allowFailure = false; + failures++; + if( res.isEmpty() && !initial && failures == 1 ) { + /* for now we are only allowing 1 node to be down on a reconfig. this can be made to be a minority + trying to keep change small as release is near. + */ + const Member* m = theReplSet->findById( i->_id ); + if( m ) { + // ok, so this was an existing member (wouldn't make sense to add to config a new member that is down) + assert( m->h().toString() == i->h.toString() ); + allowFailure = true; + } + } + + if( !allowFailure ) { + string msg = string("need members up to initiate, not ok : ") + i->h.toString(); + if( !initial ) + msg = string("need most members up to reconfigure, not ok : ") + i->h.toString(); + uasserted(13144, msg); + } + } + } + if( initial ) { + bool hasData = res["hasData"].Bool(); + uassert(13311, "member " + i->h.toString() + " has data already, cannot initiate set. All members except initiator must be empty.", + !hasData || i->h.isSelf()); + } + } + } + + class CmdReplSetInitiate : public ReplSetCommand { + public: + virtual LockType locktype() const { return NONE; } + CmdReplSetInitiate() : ReplSetCommand("replSetInitiate") { } + virtual void help(stringstream& h) const { + h << "Initiate/christen a replica set."; + h << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; + } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + log() << "replSet replSetInitiate admin command received from client" << rsLog; + + if( !replSet ) { + errmsg = "server is not running with --replSet"; + return false; + } + if( theReplSet ) { + errmsg = "already initialized"; + result.append("info", "try querying " + rsConfigNs + " to see current configuration"); + return false; + } + + { + // just make sure we can get a write lock before doing anything else. we'll reacquire one + // later. of course it could be stuck then, but this check lowers the risk if weird things + // are up. + time_t t = time(0); + writelock lk(""); + if( time(0)-t > 10 ) { + errmsg = "took a long time to get write lock, so not initiating. Initiate when server less busy?"; + return false; + } + + /* check that we don't already have an oplog. that could cause issues. + it is ok if the initiating member has *other* data than that. + */ + BSONObj o; + if( Helpers::getFirst(rsoplog, o) ) { + errmsg = rsoplog + string(" is not empty on the initiating member. cannot initiate."); + return false; + } + } + + if( ReplSet::startupStatus == ReplSet::BADCONFIG ) { + errmsg = "server already in BADCONFIG state (check logs); not initiating"; + result.append("info", ReplSet::startupStatusMsg); + return false; + } + if( ReplSet::startupStatus != ReplSet::EMPTYCONFIG ) { + result.append("startupStatus", ReplSet::startupStatus); + errmsg = "all members and seeds must be reachable to initiate set"; + result.append("info", cmdLine._replSet); + return false; + } + + BSONObj configObj; + + if( cmdObj["replSetInitiate"].type() != Object ) { + result.append("info2", "no configuration explicitly specified -- making one"); + log() << "replSet info initiate : no configuration specified. Using a default configuration for the set" << rsLog; + + string name; + vector seeds; + set seedSet; + parseReplsetCmdLine(cmdLine._replSet, name, seeds, seedSet); // may throw... + + bob b; + b.append("_id", name); + bob members; + members.append("0", BSON( "_id" << 0 << "host" << HostAndPort::Me().toString() )); + for( unsigned i = 0; i < seeds.size(); i++ ) + members.append(bob::numStr(i+1), BSON( "_id" << i+1 << "host" << seeds[i].toString())); + b.appendArray("members", members.obj()); + configObj = b.obj(); + log() << "replSet created this configuration for initiation : " << configObj.toString() << rsLog; + } + else { + configObj = cmdObj["replSetInitiate"].Obj(); + } + + bool parsed = false; + try { + ReplSetConfig newConfig(configObj); + parsed = true; + + if( newConfig.version > 1 ) { + errmsg = "can't initiate with a version number greater than 1"; + return false; + } + + log() << "replSet replSetInitiate config object parses ok, " << newConfig.members.size() << " members specified" << rsLog; + + checkMembersUpForConfigChange(newConfig, true); + + log() << "replSet replSetInitiate all members seem up" << rsLog; + + writelock lk(""); + bo comment = BSON( "msg" << "initiating set"); + newConfig.saveConfigLocally(comment); + log() << "replSet replSetInitiate config now saved locally. Should come online in about a minute." << rsLog; + result.append("info", "Config now saved locally. Should come online in about a minute."); + ReplSet::startupStatus = ReplSet::SOON; + ReplSet::startupStatusMsg = "Received replSetInitiate - should come online shortly."; + } + catch( DBException& e ) { + log() << "replSet replSetInitiate exception: " << e.what() << rsLog; + if( !parsed ) + errmsg = string("couldn't parse cfg object ") + e.what(); + else + errmsg = string("couldn't initiate : ") + e.what(); + return false; + } + + return true; + } + } cmdReplSetInitiate; + +} diff -Nru mongodb-1.4.4/db/repl/rs_member.h mongodb-1.6.3/db/repl/rs_member.h --- mongodb-1.4.4/db/repl/rs_member.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_member.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,100 @@ +// @file rsmember.h +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** replica set member */ + +#pragma once + +namespace mongo { + + + /* + RS_STARTUP serving still starting up, or still trying to initiate the set + RS_PRIMARY this server thinks it is primary + RS_SECONDARY this server thinks it is a secondary (slave mode) + RS_RECOVERING recovering/resyncing; after recovery usually auto-transitions to secondary + RS_FATAL something bad has occurred and server is not completely offline with regard to the replica set. fatal error. + RS_STARTUP2 loaded config, still determining who is primary + */ + struct MemberState { + enum MS { + RS_STARTUP, + RS_PRIMARY, + RS_SECONDARY, + RS_RECOVERING, + RS_FATAL, + RS_STARTUP2, + RS_UNKNOWN, /* remote node not yet reached */ + RS_ARBITER, + RS_DOWN, /* node not reachable for a report */ + RS_ROLLBACK + } s; + + MemberState(MS ms = RS_UNKNOWN) : s(ms) { } + explicit MemberState(int ms) : s((MS) ms) { } + + bool primary() const { return s == RS_PRIMARY; } + bool secondary() const { return s == RS_SECONDARY; } + bool recovering() const { return s == RS_RECOVERING; } + bool startup2() const { return s == RS_STARTUP2; } + bool fatal() const { return s == RS_FATAL; } + bool rollback() const { return s == RS_ROLLBACK; } + + string toString() const; + + bool operator==(const MemberState& r) const { return s == r.s; } + bool operator!=(const MemberState& r) const { return s != r.s; } + }; + + /* this is supposed to be just basic information on a member, + and copy constructable. */ + class HeartbeatInfo { + unsigned _id; + public: + HeartbeatInfo() : _id(0xffffffff),hbstate(MemberState::RS_UNKNOWN),health(-1.0),downSince(0),skew(INT_MIN) { } + HeartbeatInfo(unsigned id); + bool up() const { return health > 0; } + unsigned id() const { return _id; } + MemberState hbstate; + double health; + time_t upSince; + long long downSince; + time_t lastHeartbeat; + string lastHeartbeatMsg; + OpTime opTime; + int skew; + + long long timeDown() const; // ms + + /* true if changed in a way of interest to the repl set manager. */ + bool changed(const HeartbeatInfo& old) const; + }; + + inline HeartbeatInfo::HeartbeatInfo(unsigned id) : _id(id) { + hbstate = MemberState::RS_UNKNOWN; + health = -1.0; + downSince = 0; + lastHeartbeat = upSince = 0; + skew = INT_MIN; + } + + inline bool HeartbeatInfo::changed(const HeartbeatInfo& old) const { + return health != old.health || + hbstate != old.hbstate; + } + +} diff -Nru mongodb-1.4.4/db/repl/rs_optime.h mongodb-1.6.3/db/repl/rs_optime.h --- mongodb-1.4.4/db/repl/rs_optime.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_optime.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,58 @@ +// @file rs_optime.h + +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "../../util/optime.h" + +namespace mongo { + + const char rsoplog[] = "local.oplog.rs"; + + /* + class RSOpTime : public OpTime { + public: + bool initiated() const { return getSecs() != 0; } + };*/ + + /*struct RSOpTime { + unsigned long long ord; + + RSOpTime() : ord(0) { } + + bool initiated() const { return ord > 0; } + + void initiate() { + assert( !initiated() ); + ord = 1000000; + } + + ReplTime inc() { + DEV assertInWriteLock(); + return ++ord; + } + + string toString() const { return str::stream() << ord; } + + // query the oplog and set the highest value herein. acquires a db read lock. throws. + void load(); + }; + + extern RSOpTime rsOpTime;*/ + +} diff -Nru mongodb-1.4.4/db/repl/rs_rollback.cpp mongodb-1.6.3/db/repl/rs_rollback.cpp --- mongodb-1.4.4/db/repl/rs_rollback.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_rollback.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,672 @@ +/* @file rs_rollback.cpp +* +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../client.h" +#include "../../client/dbclient.h" +#include "rs.h" +#include "../repl.h" +#include "../query.h" + +/* Scenarios + + We went offline with ops not replicated out. + + F = node that failed and coming back. + P = node that took over, new primary + + #1: + F : a b c d e f g + P : a b c d q + + The design is "keep P". One could argue here that "keep F" has some merits, however, in most cases P + will have significantly more data. Also note that P may have a proper subset of F's stream if there were + no subsequent writes. + + For now the model is simply : get F back in sync with P. If P was really behind or something, we should have + just chosen not to fail over anyway. + + #2: + F : a b c d e f g -> a b c d + P : a b c d + + #3: + F : a b c d e f g -> a b c d q r s t u v w x z + P : a b c d.q r s t u v w x z + + Steps + find an event in common. 'd'. + undo our events beyond that by: + (1) taking copy from other server of those objects + (2) do not consider copy valid until we pass reach an optime after when we fetched the new version of object + -- i.e., reset minvalid. + (3) we could skip operations on objects that are previous in time to our capture of the object as an optimization. + +*/ + +namespace mongo { + + using namespace bson; + + bool copyCollectionFromRemote(const string& host, const string& ns, const BSONObj& query, string& errmsg, bool logforrepl); + void incRBID(); + + class rsfatal : public std::exception { + public: + virtual const char* what() const throw(){ return "replica set fatal exception"; } + }; + + struct DocID { + const char *ns; + be _id; + bool operator<(const DocID& d) const { + int c = strcmp(ns, d.ns); + if( c < 0 ) return true; + if( c > 0 ) return false; + return _id < d._id; + } + }; + + struct HowToFixUp { + /* note this is a set -- if there are many $inc's on a single document we need to rollback, we only + need to refetch it once. */ + set toRefetch; + + /* collections to drop */ + set toDrop; + + set collectionsToResync; + + OpTime commonPoint; + DiskLoc commonPointOurDiskloc; + + int rbid; // remote server's current rollback sequence # + }; + + static void refetch(HowToFixUp& h, const BSONObj& ourObj) { + const char *op = ourObj.getStringField("op"); + if( *op == 'n' ) + return; + + unsigned long long totSize = 0; + totSize += ourObj.objsize(); + if( totSize > 512 * 1024 * 1024 ) + throw "rollback too large"; + + DocID d; + d.ns = ourObj.getStringField("ns"); + if( *d.ns == 0 ) { + log() << "replSet WARNING ignoring op on rollback no ns TODO : " << ourObj.toString() << rsLog; + return; + } + + bo o = ourObj.getObjectField(*op=='u' ? "o2" : "o"); + if( o.isEmpty() ) { + log() << "replSet warning ignoring op on rollback : " << ourObj.toString() << rsLog; + return; + } + + if( *op == 'c' ) { + be first = o.firstElement(); + NamespaceString s(d.ns); // foo.$cmd + string cmdname = first.fieldName(); + Command *cmd = Command::findCommand(cmdname.c_str()); + if( cmd == 0 ) { + log() << "replSet warning rollback no suchcommand " << first.fieldName() << " - different mongod versions perhaps?" << rsLog; + return; + } + else { + /* findandmodify - tranlated? + godinsert?, + renamecollection a->b. just resync a & b + */ + if( cmdname == "create" ) { + /* Create collection operation + { ts: ..., h: ..., op: "c", ns: "foo.$cmd", o: { create: "abc", ... } } + */ + string ns = s.db + '.' + o["create"].String(); // -> foo.abc + h.toDrop.insert(ns); + return; + } + else if( cmdname == "drop" ) { + string ns = s.db + '.' + first.valuestr(); + h.collectionsToResync.insert(ns); + return; + } + else if( cmdname == "dropIndexes" || cmdname == "deleteIndexes" ) { + /* TODO: this is bad. we simply full resync the collection here, which could be very slow. */ + log() << "replSet info rollback of dropIndexes is slow in this version of mongod" << rsLog; + string ns = s.db + '.' + first.valuestr(); + h.collectionsToResync.insert(ns); + return; + } + else if( cmdname == "renameCollection" ) { + /* TODO: slow. */ + log() << "replSet info rollback of renameCollection is slow in this version of mongod" << rsLog; + string from = first.valuestr(); + string to = o["to"].String(); + h.collectionsToResync.insert(from); + h.collectionsToResync.insert(to); + return; + } + else if( cmdname == "reIndex" ) { + return; + } + else if( cmdname == "dropDatabase" ) { + log() << "replSet error rollback : can't rollback drop database full resync will be required" << rsLog; + log() << "replSet " << o.toString() << rsLog; + throw rsfatal(); + } + else { + log() << "replSet error can't rollback this command yet: " << o.toString() << rsLog; + log() << "replSet cmdname=" << cmdname << rsLog; + throw rsfatal(); + } + } + } + + d._id = o["_id"]; + if( d._id.eoo() ) { + log() << "replSet WARNING ignoring op on rollback no _id TODO : " << d.ns << ' '<< ourObj.toString() << rsLog; + return; + } + + h.toRefetch.insert(d); + } + + int getRBID(DBClientConnection*); + + static void syncRollbackFindCommonPoint(DBClientConnection *them, HowToFixUp& h) { + static time_t last; + if( time(0)-last < 60 ) { + throw "findcommonpoint waiting a while before trying again"; + } + last = time(0); + + assert( dbMutex.atLeastReadLocked() ); + Client::Context c(rsoplog, dbpath, 0, false); + NamespaceDetails *nsd = nsdetails(rsoplog); + assert(nsd); + ReverseCappedCursor u(nsd); + if( !u.ok() ) + throw "our oplog empty or unreadable"; + + const Query q = Query().sort(reverseNaturalObj); + const bo fields = BSON( "ts" << 1 << "h" << 1 ); + + //auto_ptr u = us->query(rsoplog, q, 0, 0, &fields, 0, 0); + + h.rbid = getRBID(them); + auto_ptr t = them->query(rsoplog, q, 0, 0, &fields, 0, 0); + + if( t.get() == 0 || !t->more() ) throw "remote oplog empty or unreadable"; + + BSONObj ourObj = u.current(); + OpTime ourTime = ourObj["ts"]._opTime(); + BSONObj theirObj = t->nextSafe(); + OpTime theirTime = theirObj["ts"]._opTime(); + + { + long long diff = (long long) ourTime.getSecs() - ((long long) theirTime.getSecs()); + /* diff could be positive, negative, or zero */ + log() << "replSet info rollback our last optime: " << ourTime.toStringPretty() << rsLog; + log() << "replSet info rollback their last optime: " << theirTime.toStringPretty() << rsLog; + log() << "replSet info rollback diff in end of log times: " << diff << " seconds" << rsLog; + if( diff > 3600 ) { + log() << "replSet rollback too long a time period for a rollback." << rsLog; + throw "error not willing to roll back more than one hour of data"; + } + } + + unsigned long long scanned = 0; + while( 1 ) { + scanned++; + /* todo add code to assure no excessive scanning for too long */ + if( ourTime == theirTime ) { + if( ourObj["h"].Long() == theirObj["h"].Long() ) { + // found the point back in time where we match. + // todo : check a few more just to be careful about hash collisions. + log() << "replSet rollback found matching events at " << ourTime.toStringPretty() << rsLog; + log() << "replSet rollback findcommonpoint scanned : " << scanned << rsLog; + h.commonPoint = ourTime; + h.commonPointOurDiskloc = u.currLoc(); + return; + } + + refetch(h, ourObj); + + if( !t->more() ) { + log() << "replSet rollback error RS100 reached beginning of remote oplog" << rsLog; + log() << "replSet them: " << them->toString() << " scanned: " << scanned << rsLog; + log() << "replSet theirTime: " << theirTime.toStringLong() << rsLog; + log() << "replSet ourTime: " << ourTime.toStringLong() << rsLog; + throw "RS100 reached beginning of remote oplog [2]"; + } + theirObj = t->nextSafe(); + theirTime = theirObj["ts"]._opTime(); + + u.advance(); + if( !u.ok() ) { + log() << "replSet rollback error RS101 reached beginning of local oplog" << rsLog; + log() << "replSet them: " << them->toString() << " scanned: " << scanned << rsLog; + log() << "replSet theirTime: " << theirTime.toStringLong() << rsLog; + log() << "replSet ourTime: " << ourTime.toStringLong() << rsLog; + throw "RS101 reached beginning of local oplog [1]"; + } + ourObj = u.current(); + ourTime = ourObj["ts"]._opTime(); + } + else if( theirTime > ourTime ) { + if( !t->more() ) { + log() << "replSet rollback error RS100 reached beginning of remote oplog" << rsLog; + log() << "replSet them: " << them->toString() << " scanned: " << scanned << rsLog; + log() << "replSet theirTime: " << theirTime.toStringLong() << rsLog; + log() << "replSet ourTime: " << ourTime.toStringLong() << rsLog; + throw "RS100 reached beginning of remote oplog [1]"; + } + theirObj = t->nextSafe(); + theirTime = theirObj["ts"]._opTime(); + } + else { + // theirTime < ourTime + refetch(h, ourObj); + u.advance(); + if( !u.ok() ) { + log() << "replSet rollback error RS101 reached beginning of local oplog" << rsLog; + log() << "replSet them: " << them->toString() << " scanned: " << scanned << rsLog; + log() << "replSet theirTime: " << theirTime.toStringLong() << rsLog; + log() << "replSet ourTime: " << ourTime.toStringLong() << rsLog; + throw "RS101 reached beginning of local oplog [2]"; + } + ourObj = u.current(); + ourTime = ourObj["ts"]._opTime(); + } + } + } + + struct X { + const bson::bo *op; + bson::bo goodVersionOfObject; + }; + + static void setMinValid(bo newMinValid) { + try { + log() << "replSet minvalid=" << newMinValid["ts"]._opTime().toStringLong() << rsLog; + } + catch(...) { } + { + Helpers::putSingleton("local.replset.minvalid", newMinValid); + Client::Context cx( "local." ); + cx.db()->flushFiles(true); + } + } + + void ReplSetImpl::syncFixUp(HowToFixUp& h, OplogReader& r) { + DBClientConnection *them = r.conn(); + + // fetch all first so we needn't handle interruption in a fancy way + + unsigned long long totSize = 0; + + list< pair > goodVersions; + + bo newMinValid; + + /* fetch all the goodVersions of each document from current primary */ + DocID d; + unsigned long long n = 0; + try { + for( set::iterator i = h.toRefetch.begin(); i != h.toRefetch.end(); i++ ) { + d = *i; + + assert( !d._id.eoo() ); + + { + /* TODO : slow. lots of round trips. */ + n++; + bo good= them->findOne(d.ns, d._id.wrap()).getOwned(); + totSize += good.objsize(); + uassert( 13410, "replSet too much data to roll back", totSize < 300 * 1024 * 1024 ); + + // note good might be eoo, indicating we should delete it + goodVersions.push_back(pair(d,good)); + } + } + newMinValid = r.getLastOp(rsoplog); + if( newMinValid.isEmpty() ) { + sethbmsg("rollback error newMinValid empty?"); + return; + } + } + catch(DBException& e) { + sethbmsg(str::stream() << "rollback re-get objects: " << e.toString(),0); + log() << "rollback couldn't re-get ns:" << d.ns << " _id:" << d._id << ' ' << n << '/' << h.toRefetch.size() << rsLog; + throw e; + } + + MemoryMappedFile::flushAll(true); + + sethbmsg("rollback 3.5"); + if( h.rbid != getRBID(r.conn()) ) { + // our source rolled back itself. so the data we received isn't necessarily consistent. + sethbmsg("rollback rbid on source changed during rollback, cancelling this attempt"); + return; + } + + // update them + sethbmsg(str::stream() << "rollback 4 n:" << goodVersions.size()); + + bool warn = false; + + assert( !h.commonPointOurDiskloc.isNull() ); + + dbMutex.assertWriteLocked(); + + /* we have items we are writing that aren't from a point-in-time. thus best not to come online + until we get to that point in freshness. */ + setMinValid(newMinValid); + + /** any full collection resyncs required? */ + if( !h.collectionsToResync.empty() ) { + for( set::iterator i = h.collectionsToResync.begin(); i != h.collectionsToResync.end(); i++ ) { + string ns = *i; + sethbmsg(str::stream() << "rollback 4.1 coll resync " << ns); + Client::Context c(*i, dbpath, 0, /*doauth*/false); + try { + bob res; + string errmsg; + dropCollection(ns, errmsg, res); + { + dbtemprelease r; + bool ok = copyCollectionFromRemote(them->getServerAddress(), ns, bo(), errmsg, false); + if( !ok ) { + log() << "replSet rollback error resyncing collection " << ns << ' ' << errmsg << rsLog; + throw "rollback error resyncing rollection [1]"; + } + } + } + catch(...) { + log() << "replset rollback error resyncing collection " << ns << rsLog; + throw "rollback error resyncing rollection [2]"; + } + } + + /* we did more reading from primary, so check it again for a rollback (which would mess us up), and + make minValid newer. + */ + sethbmsg("rollback 4.2"); + { + string err; + try { + newMinValid = r.getLastOp(rsoplog); + if( newMinValid.isEmpty() ) { + err = "can't get minvalid from primary"; + } else { + setMinValid(newMinValid); + } + } + catch(...) { + err = "can't get/set minvalid"; + } + if( h.rbid != getRBID(r.conn()) ) { + // our source rolled back itself. so the data we received isn't necessarily consistent. + // however, we've now done writes. thus we have a problem. + err += "rbid at primary changed during resync/rollback"; + } + if( !err.empty() ) { + log() << "replSet error rolling back : " << err << ". A full resync will be necessary." << rsLog; + /* todo: reset minvalid so that we are permanently in fatal state */ + /* todo: don't be fatal, but rather, get all the data first. */ + sethbmsg("rollback error"); + throw rsfatal(); + } + } + sethbmsg("rollback 4.3"); + } + + sethbmsg("rollback 4.6"); + /** drop collections to drop before doing individual fixups - that might make things faster below actually if there were subsequent inserts to rollback */ + for( set::iterator i = h.toDrop.begin(); i != h.toDrop.end(); i++ ) { + Client::Context c(*i, dbpath, 0, /*doauth*/false); + try { + bob res; + string errmsg; + log(1) << "replSet rollback drop: " << *i << rsLog; + dropCollection(*i, errmsg, res); + } + catch(...) { + log() << "replset rollback error dropping collection " << *i << rsLog; + } + } + + sethbmsg("rollback 4.7"); + Client::Context c(rsoplog, dbpath, 0, /*doauth*/false); + NamespaceDetails *oplogDetails = nsdetails(rsoplog); + uassert(13423, str::stream() << "replSet error in rollback can't find " << rsoplog, oplogDetails); + + map > removeSavers; + + unsigned deletes = 0, updates = 0; + for( list >::iterator i = goodVersions.begin(); i != goodVersions.end(); i++ ) { + const DocID& d = i->first; + bo pattern = d._id.wrap(); // { _id : ... } + try { + assert( d.ns && *d.ns ); + if( h.collectionsToResync.count(d.ns) ) { + /* we just synced this entire collection */ + continue; + } + + /* keep an archive of items rolled back */ + shared_ptr& rs = removeSavers[d.ns]; + if ( ! rs ) + rs.reset( new RemoveSaver( "rollback" , "" , d.ns ) ); + + // todo: lots of overhead in context, this can be faster + Client::Context c(d.ns, dbpath, 0, /*doauth*/false); + if( i->second.isEmpty() ) { + // wasn't on the primary; delete. + /* TODO1.6 : can't delete from a capped collection. need to handle that here. */ + deletes++; + + NamespaceDetails *nsd = nsdetails(d.ns); + if( nsd ) { + if( nsd->capped ) { + /* can't delete from a capped collection - so we truncate instead. if this item must go, + so must all successors!!! */ + try { + /** todo: IIRC cappedTrunateAfter does not handle completely empty. todo. */ + // this will crazy slow if no _id index. + long long start = Listener::getElapsedTimeMillis(); + DiskLoc loc = Helpers::findOne(d.ns, pattern, false); + if( Listener::getElapsedTimeMillis() - start > 200 ) + log() << "replSet warning roll back slow no _id index for " << d.ns << " perhaps?" << rsLog; + //would be faster but requires index: DiskLoc loc = Helpers::findById(nsd, pattern); + if( !loc.isNull() ) { + try { + nsd->cappedTruncateAfter(d.ns, loc, true); + } + catch(DBException& e) { + if( e.getCode() == 13415 ) { + // hack: need to just make cappedTruncate do this... + nsd->emptyCappedCollection(d.ns); + } else { + throw; + } + } + } + } + catch(DBException& e) { + log() << "replSet error rolling back capped collection rec " << d.ns << ' ' << e.toString() << rsLog; + } + } + else { + try { + deletes++; + deleteObjects(d.ns, pattern, /*justone*/true, /*logop*/false, /*god*/true, rs.get() ); + } + catch(...) { + log() << "replSet error rollback delete failed ns:" << d.ns << rsLog; + } + } + // did we just empty the collection? if so let's check if it even exists on the source. + if( nsd->nrecords == 0 ) { + try { + string sys = cc().database()->name + ".system.namespaces"; + bo o = them->findOne(sys, QUERY("name"<second, pattern, /*upsert=*/true, /*multi=*/false , /*logtheop=*/false , debug, rs.get() ); + } + } + catch(DBException& e) { + log() << "replSet exception in rollback ns:" << d.ns << ' ' << pattern.toString() << ' ' << e.toString() << " ndeletes:" << deletes << rsLog; + warn = true; + } + } + + removeSavers.clear(); // this effectively closes all of them + + sethbmsg(str::stream() << "rollback 5 d:" << deletes << " u:" << updates); + MemoryMappedFile::flushAll(true); + sethbmsg("rollback 6"); + + // clean up oplog + log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog; + // todo: fatal error if this throws? + oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false); + + /* reset cached lastoptimewritten and h value */ + loadLastOpTimeWritten(); + + sethbmsg("rollback 7"); + MemoryMappedFile::flushAll(true); + + // done + if( warn ) + sethbmsg("issues during syncRollback, see log"); + else + sethbmsg("rollback done"); + } + + void ReplSetImpl::syncRollback(OplogReader&r) { + unsigned s = _syncRollback(r); + if( s ) + sleepsecs(s); + } + + unsigned ReplSetImpl::_syncRollback(OplogReader&r) { + assert( !lockedByMe() ); + assert( !dbMutex.atLeastReadLocked() ); + + sethbmsg("rollback 0"); + + writelocktry lk(rsoplog, 20000); + if( !lk.got() ) { + sethbmsg("rollback couldn't get write lock in a reasonable time"); + return 2; + } + + if( box.getState().secondary() ) { + /* by doing this, we will not service reads (return an error as we aren't in secondary staate. + that perhaps is moot becasue of the write lock above, but that write lock probably gets deferred + or removed or yielded later anyway. + + also, this is better for status reporting - we know what is happening. + */ + box.change(MemberState::RS_ROLLBACK, _self); + } + + HowToFixUp how; + sethbmsg("rollback 1"); + { + r.resetCursor(); + /*DBClientConnection us(false, 0, 0); + string errmsg; + if( !us.connect(HostAndPort::me().toString(),errmsg) ) { + sethbmsg("rollback connect to self failure" + errmsg); + return; + }*/ + + sethbmsg("rollback 2 FindCommonPoint"); + try { + syncRollbackFindCommonPoint(r.conn(), how); + } + catch( const char *p ) { + sethbmsg(string("rollback 2 error ") + p); + return 10; + } + catch( rsfatal& ) { + _fatal(); + return 2; + } + catch( DBException& e ) { + sethbmsg(string("rollback 2 exception ") + e.toString() + "; sleeping 1 min"); + dbtemprelease r; + sleepsecs(60); + throw; + } + } + + sethbmsg("replSet rollback 3 fixup"); + + { + incRBID(); + try { + syncFixUp(how, r); + } + catch( rsfatal& ) { + sethbmsg("rollback fixup error"); + _fatal(); + return 2; + } + catch(...) { + incRBID(); throw; + } + incRBID(); + + /* success - leave "ROLLBACK" state + can go to SECONDARY once minvalid is achieved + */ + box.change(MemberState::RS_RECOVERING, _self); + } + + return 0; + } + +} diff -Nru mongodb-1.4.4/db/repl/rs_sync.cpp mongodb-1.6.3/db/repl/rs_sync.cpp --- mongodb-1.4.4/db/repl/rs_sync.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/rs_sync.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,392 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../client.h" +#include "../../client/dbclient.h" +#include "rs.h" +#include "../repl.h" + +namespace mongo { + + using namespace bson; + + extern unsigned replSetForceInitialSyncFailure; + + void startSyncThread() { + Client::initThread("rs_sync"); + cc().iAmSyncThread(); + theReplSet->syncThread(); + cc().shutdown(); + } + + void ReplSetImpl::syncApply(const BSONObj &o) { + //const char *op = o.getStringField("op"); + + char db[MaxDatabaseLen]; + const char *ns = o.getStringField("ns"); + nsToDatabase(ns, db); + + if ( *ns == '.' || *ns == 0 ) { + if( *o.getStringField("op") == 'n' ) + return; + log() << "replSet skipping bad op in oplog: " << o.toString() << endl; + return; + } + + Client::Context ctx(ns); + ctx.getClient()->curop()->reset(); + + /* todo : if this asserts, do we want to ignore or not? */ + applyOperation_inlock(o); + } + + bool ReplSetImpl::initialSyncOplogApplication( + string hn, + const Member *primary, + OpTime applyGTE, + OpTime minValid) + { + if( primary == 0 ) return false; + + OpTime ts; + try { + OplogReader r; + if( !r.connect(hn) ) { + log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog; + return false; + } + + r.query(rsoplog, bo()); + assert( r.haveCursor() ); + + /* we lock outside the loop to avoid the overhead of locking on every operation. server isn't usable yet anyway! */ + writelock lk(""); + + { + if( !r.more() ) { + sethbmsg("replSet initial sync error reading remote oplog"); + return false; + } + bo op = r.next(); + OpTime t = op["ts"]._opTime(); + r.putBack(op); + assert( !t.isNull() ); + if( t > applyGTE ) { + sethbmsg(str::stream() << "error " << hn << " oplog wrapped during initial sync"); + return false; + } + } + + // todo : use exhaust + unsigned long long n = 0; + while( 1 ) { + + if( !r.more() ) + break; + BSONObj o = r.nextSafe(); /* note we might get "not master" at some point */ + { + //writelock lk(""); + + ts = o["ts"]._opTime(); + + /* if we have become primary, we dont' want to apply things from elsewhere + anymore. assumePrimary is in the db lock so we are safe as long as + we check after we locked above. */ + const Member *p1 = box.getPrimary(); + if( p1 != primary || replSetForceInitialSyncFailure ) { + int f = replSetForceInitialSyncFailure; + if( f > 0 ) { + replSetForceInitialSyncFailure = f-1; + log() << "replSet test code invoked, replSetForceInitialSyncFailure" << rsLog; + } + log() << "replSet primary was:" << primary->fullName() << " now:" << + (p1 != 0 ? p1->fullName() : "none") << rsLog; + throw DBException("primary changed",0); + } + + if( ts >= applyGTE ) { + // optimes before we started copying need not be applied. + syncApply(o); + } + _logOpObjRS(o); /* with repl sets we write the ops to our oplog too */ + } + if( ++n % 100000 == 0 ) { + // simple progress metering + log() << "replSet initialSyncOplogApplication " << n << rsLog; + } + } + } + catch(DBException& e) { + if( ts <= minValid ) { + // didn't make it far enough + log() << "replSet initial sync failing, error applying oplog " << e.toString() << rsLog; + return false; + } + } + return true; + } + + /* should be in RECOVERING state on arrival here. + readlocks + @return true if transitioned to SECONDARY + */ + bool ReplSetImpl::tryToGoLiveAsASecondary(OpTime& /*out*/ minvalid) { + bool golive = false; + { + readlock lk("local.replset.minvalid"); + BSONObj mv; + if( Helpers::getSingleton("local.replset.minvalid", mv) ) { + minvalid = mv["ts"]._opTime(); + if( minvalid <= lastOpTimeWritten ) { + golive=true; + } + } + else + golive = true; /* must have been the original member */ + } + if( golive ) { + sethbmsg(""); + changeState(MemberState::RS_SECONDARY); + } + return golive; + } + + /* tail the primary's oplog. ok to return, will be re-called. */ + void ReplSetImpl::syncTail() { + // todo : locking vis a vis the mgr... + + const Member *primary = box.getPrimary(); + if( primary == 0 ) return; + string hn = primary->h().toString(); + OplogReader r; + if( !r.connect(primary->h().toString()) ) { + log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog; + return; + } + + /* first make sure we are not hopelessly out of sync by being very stale. */ + { + BSONObj remoteOldestOp = r.findOne(rsoplog, Query()); + OpTime ts = remoteOldestOp["ts"]._opTime(); + DEV log() << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; + else log(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; + DEV { + // debugging sync1.js... + log() << "replSet lastOpTimeWritten: " << lastOpTimeWritten.toStringLong() << rsLog; + log() << "replSet our state: " << state().toString() << rsLog; + } + if( lastOpTimeWritten < ts ) { + log() << "replSet error RS102 too stale to catch up, at least from primary: " << hn << rsLog; + log() << "replSet our last optime : " << lastOpTimeWritten.toStringLong() << rsLog; + log() << "replSet oldest at " << hn << " : " << ts.toStringLong() << rsLog; + log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog; + sethbmsg("error RS102 too stale to catch up"); + sleepsecs(120); + return; + } + } + + r.tailingQueryGTE(rsoplog, lastOpTimeWritten); + assert( r.haveCursor() ); + assert( r.awaitCapable() ); + + { + if( !r.more() ) { + /* maybe we are ahead and need to roll back? */ + try { + bo theirLastOp = r.getLastOp(rsoplog); + if( theirLastOp.isEmpty() ) { + log() << "replSet error empty query result from " << hn << " oplog" << rsLog; + sleepsecs(2); + return; + } + OpTime theirTS = theirLastOp["ts"]._opTime(); + if( theirTS < lastOpTimeWritten ) { + log() << "replSet we are ahead of the primary, will try to roll back" << rsLog; + syncRollback(r); + return; + } + /* we're not ahead? maybe our new query got fresher data. best to come back and try again */ + log() << "replSet syncTail condition 1" << rsLog; + sleepsecs(1); + } + catch(DBException& e) { + log() << "replSet error querying " << hn << ' ' << e.toString() << rsLog; + sleepsecs(2); + } + return; + /* + log() << "replSet syncTail error querying oplog >= " << lastOpTimeWritten.toString() << " from " << hn << rsLog; + try { + log() << "replSet " << hn << " last op: " << r.getLastOp(rsoplog).toString() << rsLog; + } + catch(...) { } + sleepsecs(1); + return;*/ + } + + BSONObj o = r.nextSafe(); + OpTime ts = o["ts"]._opTime(); + long long h = o["h"].numberLong(); + if( ts != lastOpTimeWritten || h != lastH ) { + log(1) << "TEMP our last op time written: " << lastOpTimeWritten.toStringPretty() << endl; + log(1) << "TEMP primary's GTE: " << ts.toStringPretty() << endl; + /* + }*/ + + syncRollback(r); + return; + } + } + + /* we have now checked if we need to rollback and we either don't have to or did it. */ + { + OpTime minvalid; + tryToGoLiveAsASecondary(minvalid); + } + + while( 1 ) { + while( 1 ) { + if( !r.moreInCurrentBatch() ) { + /* we need to occasionally check some things. between + batches is probably a good time. */ + + /* perhaps we should check this earlier? but not before the rollback checks. */ + if( state().recovering() ) { + /* can we go to RS_SECONDARY state? we can if not too old and if minvalid achieved */ + OpTime minvalid; + bool golive = ReplSetImpl::tryToGoLiveAsASecondary(minvalid); + if( golive ) { + ; + } + else { + sethbmsg(str::stream() << "still syncing, not yet to minValid optime" << minvalid.toString()); + } + + /* todo: too stale capability */ + } + + if( box.getPrimary() != primary ) + return; + } + if( !r.more() ) + break; + { + BSONObj o = r.nextSafe(); /* note we might get "not master" at some point */ + { + writelock lk(""); + + /* if we have become primary, we dont' want to apply things from elsewhere + anymore. assumePrimary is in the db lock so we are safe as long as + we check after we locked above. */ + if( box.getPrimary() != primary ) { + if( box.getState().primary() ) + log(0) << "replSet stopping syncTail we are now primary" << rsLog; + return; + } + + syncApply(o); + _logOpObjRS(o); /* with repl sets we write the ops to our oplog too: */ + } + int sd = myConfig().slaveDelay; + if( sd ) { + const OpTime ts = o["ts"]._opTime(); + long long a = ts.getSecs(); + long long b = time(0); + long long lag = b - a; + long long sleeptime = sd - lag; + if( sleeptime > 0 ) { + uassert(12000, "rs slaveDelay differential too big check clocks and systems", sleeptime < 0x40000000); + log() << "replSet temp slavedelay sleep:" << sleeptime << rsLog; + if( sleeptime < 60 ) { + sleepsecs((int) sleeptime); + } + else { + // sleep(hours) would prevent reconfigs from taking effect & such! + long long waitUntil = b + sleeptime; + while( 1 ) { + sleepsecs(6); + if( time(0) >= waitUntil ) + break; + if( box.getPrimary() != primary ) + break; + if( myConfig().slaveDelay != sd ) // reconf + break; + } + } + } + } + } + } + r.tailCheck(); + if( !r.haveCursor() ) { + log(1) << "replSet end syncTail pass with " << hn << rsLog; + // TODO : reuse our connection to the primary. + return; + } + if( box.getPrimary() != primary ) + return; + // looping back is ok because this is a tailable cursor + } + } + + void ReplSetImpl::_syncThread() { + StateBox::SP sp = box.get(); + if( sp.state.primary() ) { + sleepsecs(1); + return; + } + if( sp.state.fatal() ) { + sleepsecs(5); + return; + } + + /* later, we can sync from up secondaries if we want. tbd. */ + if( sp.primary == 0 ) + return; + + /* do we have anything at all? */ + if( lastOpTimeWritten.isNull() ) { + syncDoInitialSync(); + return; // _syncThread will be recalled, starts from top again in case sync failed. + } + + /* we have some data. continue tailing. */ + syncTail(); + } + + void ReplSetImpl::syncThread() { + if( myConfig().arbiterOnly ) + return; + while( 1 ) { + try { + _syncThread(); + } + catch(DBException& e) { + sethbmsg("syncThread: " + e.toString()); + sleepsecs(10); + } + catch(...) { + sethbmsg("unexpected exception in syncThread()"); + // TODO : SET NOT SECONDARY here. + sleepsecs(60); + } + sleepsecs(1); + } + } + +} diff -Nru mongodb-1.4.4/db/repl/test.html mongodb-1.6.3/db/repl/test.html --- mongodb-1.4.4/db/repl/test.html 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/test.html 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,11 @@ + + + + + + + + + diff -Nru mongodb-1.4.4/db/repl/testing.js mongodb-1.6.3/db/repl/testing.js --- mongodb-1.4.4/db/repl/testing.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl/testing.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,42 @@ +// helpers for testing repl sets +// run +// mongo --shell testing.js + +cfg = { + _id: 'asdf', + members: [ + { _id : 0, host : "dm_hp" }, + { _id : 2, host : "dm_hp:27002" } + ] +}; +c2 = { + _id: 'asdf', + members: [ + { _id: 0, host: "dmthink" }, + { _id: 2, host: "dmthink:27002" } + ] +}; + +db = db.getSisterDB("admin"); +local = db.getSisterDB("local"); + +print("\n\ndb = admin db on localhost:27017"); +print("b = admin on localhost:27002"); +print("rc(x) = db.runCommand(x)"); +print("cfg = samp replset config"); +print("i() = replSetInitiate(cfg)"); +print("ism() = rc('ismaster')"); +print("\n\n"); + +function rc(c) { return db.runCommand(c); } +function i() { return rc({ replSetInitiate: cfg }); } +function ism() { return rc("isMaster"); } + +b = 0; +try { + b = new Mongo("localhost:27002").getDB("admin"); +} +catch (e) { + print("\nCouldn't connect to b mongod instance\n"); +} + diff -Nru mongodb-1.4.4/db/repl_block.cpp mongodb-1.6.3/db/repl_block.cpp --- mongodb-1.4.4/db/repl_block.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl_block.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,207 @@ +// repl_block.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "repl.h" +#include "repl_block.h" +#include "instance.h" +#include "dbhelpers.h" +#include "../util/background.h" +#include "../util/mongoutils/str.h" +#include "../client/dbclient.h" +#include "replpair.h" + +//#define REPLDEBUG(x) log() << "replBlock: " << x << endl; +#define REPLDEBUG(x) + +namespace mongo { + + using namespace mongoutils; + + class SlaveTracking : public BackgroundJob { + public: + string name() { return "SlaveTracking"; } + + static const char * NS; + + struct Ident { + + Ident(BSONObj r,string h,string n){ + BSONObjBuilder b; + b.appendElements( r ); + b.append( "host" , h ); + b.append( "ns" , n ); + obj = b.obj(); + } + + bool operator<( const Ident& other ) const { + return obj.woCompare( other.obj ) < 0; + } + + BSONObj obj; + }; + + struct Info { + Info() : loc(0){} + ~Info(){ + if ( loc && owned ){ + delete loc; + } + } + bool owned; + OpTime * loc; + }; + + SlaveTracking() : _mutex("SlaveTracking") { + _dirty = false; + _started = false; + } + + void run(){ + Client::initThread( "slaveTracking" ); + DBDirectClient db; + while ( ! inShutdown() ){ + sleepsecs( 1 ); + + if ( ! _dirty ) + continue; + + writelock lk(NS); + + list< pair > todo; + + { + scoped_lock mylk(_mutex); + + for ( map::iterator i=_slaves.begin(); i!=_slaves.end(); i++ ){ + BSONObjBuilder temp; + temp.appendTimestamp( "syncedTo" , i->second.loc[0].asDate() ); + todo.push_back( pair( i->first.obj.getOwned() , + BSON( "$set" << temp.obj() ).getOwned() ) ); + } + + _slaves.clear(); + } + + for ( list< pair >::iterator i=todo.begin(); i!=todo.end(); i++ ){ + db.update( NS , i->first , i->second , true ); + } + + _dirty = false; + } + } + + void reset(){ + scoped_lock mylk(_mutex); + _slaves.clear(); + } + + void update( const BSONObj& rid , const string& host , const string& ns , OpTime last ){ + REPLDEBUG( host << " " << rid << " " << ns << " " << last ); + + scoped_lock mylk(_mutex); + +#ifdef _DEBUG + MongoFileAllowWrites allowWrites; +#endif + + Ident ident(rid,host,ns); + Info& i = _slaves[ ident ]; + if ( i.loc ){ + i.loc[0] = last; + return; + } + + dbMutex.assertAtLeastReadLocked(); + + BSONObj res; + if ( Helpers::findOne( NS , ident.obj , res ) ){ + assert( res["syncedTo"].type() ); + i.owned = false; + i.loc = (OpTime*)res["syncedTo"].value(); + i.loc[0] = last; + return; + } + + i.owned = true; + i.loc = new OpTime[1]; + i.loc[0] = last; + _dirty = true; + + if ( ! _started ){ + // start background thread here since we definitely need it + _started = true; + go(); + } + + } + + bool opReplicatedEnough( OpTime op , int w ){ + RARELY { + REPLDEBUG( "looking for : " << op << " w=" << w ); + } + + if ( w <= 1 || ! _isMaster() ) + return true; + + w--; // now this is the # of slaves i need + scoped_lock mylk(_mutex); + for ( map::iterator i=_slaves.begin(); i!=_slaves.end(); i++){ + OpTime s = *(i->second.loc); + if ( s < op ){ + continue; + } + if ( --w == 0 ) + return true; + } + return w <= 0; + } + + // need to be careful not to deadlock with this + mongo::mutex _mutex; + map _slaves; + bool _dirty; + bool _started; + + } slaveTracking; + + const char * SlaveTracking::NS = "local.slaves"; + + void updateSlaveLocation( CurOp& curop, const char * ns , OpTime lastOp ){ + if ( lastOp.isNull() ) + return; + + assert( str::startsWith(ns, "local.oplog.") ); + + Client * c = curop.getClient(); + assert(c); + BSONObj rid = c->getRemoteID(); + if ( rid.isEmpty() ) + return; + + slaveTracking.update( rid , curop.getRemoteString( false ) , ns , lastOp ); + } + + bool opReplicatedEnough( OpTime op , int w ){ + return slaveTracking.opReplicatedEnough( op , w ); + } + + void resetSlaveCache(){ + slaveTracking.reset(); + } +} diff -Nru mongodb-1.4.4/db/repl_block.h mongodb-1.6.3/db/repl_block.h --- mongodb-1.4.4/db/repl_block.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/repl_block.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,34 @@ +// repl_block.h - blocking on writes for replication + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../pch.h" +#include "client.h" +#include "curop.h" + +/** + local.slaves - current location for all slaves + + */ +namespace mongo { + + void updateSlaveLocation( CurOp& curop, const char * ns , OpTime lastOp ); + bool opReplicatedEnough( OpTime op , int w ); + void resetSlaveCache(); +} diff -Nru mongodb-1.4.4/db/repl.cpp mongodb-1.6.3/db/repl.cpp --- mongodb-1.4.4/db/repl.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/repl.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -31,11 +31,12 @@ local.pair.sync - { initialsynccomplete: 1 } */ -#include "stdafx.h" +#include "pch.h" #include "jsobj.h" #include "../util/goodies.h" #include "repl.h" #include "../util/message.h" +#include "../util/background.h" #include "../client/dbclient.h" #include "../client/connpool.h" #include "pdfile.h" @@ -44,16 +45,17 @@ #include "commands.h" #include "security.h" #include "cmdline.h" +#include "repl_block.h" +#include "repl/rs.h" namespace mongo { // our config from command line etc. ReplSettings replSettings; - void ensureHaveIdIndex(const char *ns); - /* if 1 sync() is running */ - int syncing = 0; + volatile int syncing = 0; + static volatile int relinquishSyncingSome = 0; /* if true replace our peer in a replication pair -- don't worry about if his local.oplog.$main is empty. @@ -69,11 +71,9 @@ IdTracker &idTracker = *( new IdTracker() ); - int __findingStartInitialTimeout = 5; // configurable for testing - } // namespace mongo -#include "replset.h" +#include "replpair.h" namespace mongo { @@ -102,7 +102,7 @@ return; info = _comment; if ( n != state && !cmdLine.quiet ) - log() << "pair: setting master=" << n << " was " << state << endl; + tlog() << "pair: setting master=" << n << " was " << state << endl; state = n; } @@ -119,7 +119,7 @@ auto_ptr conn( newClientConnection() ); string errmsg; if ( !conn->connect(arbHost.c_str(), errmsg) ) { - log() << "repl: cantconn arbiter " << errmsg << endl; + tlog() << "repl: cantconn arbiter " << errmsg << endl; setMasterLocked(State_CantArb, "can't connect to arb"); return; } @@ -131,18 +131,16 @@ class CmdReplacePeer : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual bool logTheOp() { - return false; - } - virtual LockType locktype(){ return WRITE; } - CmdReplacePeer() : Command("replacepeer") { } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual LockType locktype() const { return WRITE; } + void help(stringstream&h) const { h << "replace a node in a replica pair"; } + CmdReplacePeer() : Command("replacePeer", false, "replacepeer") { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( replPair == 0 ) { errmsg = "not paired"; return false; @@ -157,11 +155,12 @@ } Timer t; while ( 1 ) { - if ( syncing == 0 || t.millis() > 20000 ) + if ( syncing == 0 || t.millis() > 30000 ) break; { dbtemprelease t; - sleepmillis(10); + relinquishSyncingSome = 1; + sleepmillis(1); } } if ( syncing ) { @@ -191,18 +190,16 @@ class CmdForceDead : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual bool logTheOp() { - return false; - } - virtual LockType locktype(){ return WRITE; } + virtual void help(stringstream& h) const { h << "internal"; } + virtual LockType locktype() const { return WRITE; } CmdForceDead() : Command("forcedead") { } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { replAllDead = "replication forced to stop by 'forcedead' command"; log() << "*********************************************************\n"; log() << "received 'forcedead' command, replication forced to stop" << endl; @@ -213,18 +210,17 @@ /* operator requested resynchronization of replication (on the slave). { resync : 1 } */ class CmdResync : public Command { public: - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual bool logTheOp() { - return false; - } - virtual LockType locktype(){ return WRITE; } + virtual bool logTheOp() { return false; } + virtual LockType locktype() const { return WRITE; } + void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://www.mongodb.org/display/DOCS/Master+Slave"; } CmdResync() : Command("resync") { } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj.getBoolField( "force" ) ) { if ( !waitForSyncToFinish( errmsg ) ) return false; @@ -246,11 +242,12 @@ // reloaded with new saved state on next pass. Timer t; while ( 1 ) { - if ( syncing == 0 || t.millis() > 20000 ) + if ( syncing == 0 || t.millis() > 30000 ) break; { dbtemprelease t; - sleepmillis(10); + relinquishSyncingSome = 1; + sleepmillis(1); } } if ( syncing ) { @@ -268,7 +265,7 @@ void appendReplicationInfo( BSONObjBuilder& result , bool authed , int level ){ if ( replAllDead ) { - result.append("ismaster", 0.0); + result.append("ismaster", 0); if( authed ) { if ( replPair ) result.append("remote", replPair->remote); @@ -281,20 +278,22 @@ if( authed ) { result.append("remote", replPair->remote); if ( !replPair->info.empty() ) - result.append("info", replPair->info); + result.append("info", replPair->info.toString()); } } else { - result.append("ismaster", replSettings.slave ? 0 : 1); - result.append("msg", "not paired"); + result.appendBool("ismaster", _isMaster() ); } - - if ( level ){ + + if ( level && replSet ){ + result.append( "info" , "is replica set" ); + } + else if ( level ){ BSONObjBuilder sources( result.subarrayStart( "sources" ) ); readlock lk( "local.sources" ); Client::Context ctx( "local.sources" ); - auto_ptr c = findTableScan("local.sources", BSONObj()); + shared_ptr c = findTableScan("local.sources", BSONObj()); int n = 0; while ( c->ok() ){ BSONObj s = c->current(); @@ -336,17 +335,33 @@ class CmdIsMaster : public Command { public: virtual bool requiresAuth() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return NONE; } - CmdIsMaster() : Command("ismaster") { } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + virtual void help( stringstream &help ) const { + help << "Check if this server is primary for a replica pair/set; also if it is --master or --slave in simple master/slave setups.\n"; + help << "{ isMaster : 1 }"; + } + virtual LockType locktype() const { return NONE; } + CmdIsMaster() : Command("isMaster", true, "ismaster") { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not authenticated. we allow unauthenticated ismaster but we aren't as verbose informationally if one is not authenticated for admin db to be safe. */ + + if( replSet ) { + if( theReplSet == 0 ) { + result.append("ismaster", false); + result.append("secondary", false); + errmsg = "replSet still trying to initialize"; + result.append("info", ReplSet::startupStatusMsg); + return true; + } + theReplSet->fillIsMaster(result); + return true; + } bool authed = cc().getAuthenticationInfo()->isAuthorizedReads("admin"); appendReplicationInfo( result , authed ); @@ -357,12 +372,12 @@ class CmdIsInitialSyncComplete : public Command { public: virtual bool requiresAuth() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return WRITE; } + virtual LockType locktype() const { return WRITE; } CmdIsInitialSyncComplete() : Command( "isinitialsynccomplete" ) {} - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { result.appendBool( "initialsynccomplete", getInitialSyncCompleted() ); return true; } @@ -388,14 +403,14 @@ class CmdNegotiateMaster : public Command { public: CmdNegotiateMaster() : Command("negotiatemaster") { } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } - virtual LockType locktype(){ return WRITE; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual LockType locktype() const { return WRITE; } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { if ( replPair == 0 ) { massert( 10383 , "Another mongod instance believes incorrectly that this node is its peer", !cmdObj.getBoolField( "fromArbiter" ) ); // assume that we are an arbiter and should forward the request @@ -430,7 +445,7 @@ if ( e.fieldName() != string( "ok" ) ) result.append( e ); } - return ( ret.getIntField("ok") == 1 ); + return ret["ok"].trueValue(); } int was = cmdObj.getIntField("i_was"); @@ -476,7 +491,7 @@ b.append("your_port", remotePort); BSONObj cmd = b.done(); BSONObj res = conn->findOne("admin.$cmd", cmd); - if ( res.getIntField("ok") != 1 ) { + if ( ! res["ok"].trueValue() ){ string message = method + " negotiate failed"; problem() << message << ": " << res.toString() << '\n'; setMasterLocked(State_Confused, message.c_str()); @@ -495,20 +510,6 @@ return remote; } - struct TestOpTime { - TestOpTime() { - OpTime t; - for ( int i = 0; i < 10; i++ ) { - OpTime s = OpTime::now(); - assert( s != t ); - t = s; - } - OpTime q = t; - assert( q == t ); - assert( !(q != t) ); - } - } testoptime; - /* --------------------------------------------------------------*/ ReplSource::ReplSource() { @@ -573,7 +574,7 @@ int n = 0; for ( set::iterator i = addDbNextPass.begin(); i != addDbNextPass.end(); i++ ) { n++; - dbsNextPassBuilder.appendBool(i->c_str(), 1); + dbsNextPassBuilder.appendBool(*i, 1); } if ( n ) b.append("dbsNextPass", dbsNextPassBuilder.done()); @@ -582,7 +583,7 @@ n = 0; for ( set::iterator i = incompleteCloneDbs.begin(); i != incompleteCloneDbs.end(); i++ ) { n++; - incompleteCloneDbsBuilder.appendBool(i->c_str(), 1); + incompleteCloneDbsBuilder.appendBool(*i, 1); } if ( n ) b.append("incompleteCloneDbs", incompleteCloneDbsBuilder.done()); @@ -650,7 +651,7 @@ // --source specified. // check that no items are in sources other than that // add if missing - auto_ptr c = findTableScan("local.sources", BSONObj()); + shared_ptr c = findTableScan("local.sources", BSONObj()); int n = 0; while ( c->ok() ) { n++; @@ -694,7 +695,7 @@ } // check that no items are in sources other than that // add if missing - auto_ptr c = findTableScan("local.sources", BSONObj()); + shared_ptr c = findTableScan("local.sources", BSONObj()); int n = 0; while ( c->ok() ) { n++; @@ -716,7 +717,7 @@ } } - auto_ptr c = findTableScan("local.sources", BSONObj()); + shared_ptr c = findTableScan("local.sources", BSONObj()); while ( c->ok() ) { ReplSource tmp(c->current()); if ( replPair && tmp.hostName == replPair->remote && tmp.sourceName() == "main" ) { @@ -779,8 +780,9 @@ BSONObj info; { dbtemprelease t; - connect(); - bool ok = conn->runCommand( "admin", BSON( "listDatabases" << 1 ), info ); + oplogReader.connect(hostName); + /* todo use getDatabaseNames() method here */ + bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info ); massert( 10385 , "Unable to get database list", ok ); } BSONObjIterator i( info.getField( "databases" ).embeddedObject() ); @@ -804,11 +806,9 @@ string ReplSource::resyncDrop( const char *db, const char *requester ) { log() << "resync: dropping database " << db << endl; - string dummyns = string( db ) + "."; - Client::Context ctx(dummyns); - assert( cc().database()->name == db ); - dropDatabase(dummyns.c_str()); - return dummyns; + Client::Context ctx(db); + dropDatabase(db); + return db; } /* grab initial copy of a database from the master */ @@ -832,61 +832,8 @@ } void ReplSource::applyOperation(const BSONObj& op) { - log( 6 ) << "applying op: " << op << endl; - OpDebug debug; - BSONObj o = op.getObjectField("o"); - const char *ns = op.getStringField("ns"); - // operation type -- see logOp() comments for types - const char *opType = op.getStringField("op"); try { - if ( *opType == 'i' ) { - const char *p = strchr(ns, '.'); - if ( p && strcmp(p, ".system.indexes") == 0 ) { - // updates aren't allowed for indexes -- so we will do a regular insert. if index already - // exists, that is ok. - theDataFileMgr.insert(ns, (void*) o.objdata(), o.objsize()); - } - else { - // do upserts for inserts as we might get replayed more than once - BSONElement _id; - if( !o.getObjectID(_id) ) { - /* No _id. This will be very slow. */ - Timer t; - updateObjects(ns, o, o, true, false, false , debug ); - if( t.millis() >= 2 ) { - RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl; - } - } - else { - BSONObjBuilder b; - b.append(_id); - - /* erh 10/16/2009 - this is probably not relevant any more since its auto-created, but not worth removing */ - RARELY ensureHaveIdIndex(ns); // otherwise updates will be slow - - updateObjects(ns, o, b.done(), true, false, false , debug ); - } - } - } - else if ( *opType == 'u' ) { - RARELY ensureHaveIdIndex(ns); // otherwise updates will be super slow - updateObjects(ns, o, op.getObjectField("o2"), op.getBoolField("b"), false, false , debug ); - } - else if ( *opType == 'd' ) { - if ( opType[1] == 0 ) - deleteObjects(ns, o, op.getBoolField("b")); - else - assert( opType[1] == 'b' ); // "db" advertisement - } - else if ( *opType == 'n' ) { - // no op - } - else { - BufBuilder bb; - BSONObjBuilder ob; - assert( *opType == 'c' ); - _runCommands(ns, o, bb, ob, true, 0); - } + applyOperation_inlock( op ); } catch ( UserException& e ) { log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;; @@ -894,19 +841,21 @@ catch ( DBException& e ) { log() << "sync: caught db exception " << e << " while applying op: " << op << endl;; } + } - + /* local.$oplog.main is of the form: { ts: ..., op: , ns: ..., o: , o2: , b: } ... see logOp() comments. */ void ReplSource::sync_pullOpLog_applyOperation(BSONObj& op, OpTime *localLogTail) { - log( 6 ) << "processing op: " << op << endl; - // skip no-op - if ( op.getStringField( "op" )[ 0 ] == 'n' ) + if( logLevel >= 6 ) // op.tostring is expensive so doing this check explicitly + log(6) << "processing op: " << op << endl; + + if( op.getStringField("op")[0] == 'n' ) return; - + char clientName[MaxDatabaseLen]; const char *ns = op.getStringField("ns"); nsToDatabase(ns, clientName); @@ -924,6 +873,42 @@ if ( !only.empty() && only != clientName ) return; + if( cmdLine.pretouch ) { + if( cmdLine.pretouch > 1 ) { + /* note: this is bad - should be put in ReplSource. but this is first test... */ + static int countdown; + if( countdown > 0 ) { + countdown--; // was pretouched on a prev pass + assert( countdown >= 0 ); + } else { + const int m = 4; + if( tp.get() == 0 ) { + int nthr = min(8, cmdLine.pretouch); + nthr = max(nthr, 1); + tp.reset( new ThreadPool(nthr) ); + } + vector v; + oplogReader.peek(v, cmdLine.pretouch); + unsigned a = 0; + while( 1 ) { + if( a >= v.size() ) break; + unsigned b = a + m - 1; // v[a..b] + if( b >= v.size() ) b = v.size() - 1; + tp->schedule(pretouchN, v, a, b); + DEV cout << "pretouch task: " << a << ".." << b << endl; + a += m; + } + // we do one too... + pretouchOperation(op); + tp->join(); + countdown = v.size(); + } + } + else { + pretouchOperation(op); + } + } + dblock lk; if ( localLogTail && replPair && replPair->state == ReplPair::State_Master ) { @@ -943,7 +928,8 @@ bool empty = ctx.db()->isEmpty(); bool incompleteClone = incompleteCloneDbs.count( clientName ) != 0; - log( 6 ) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty << ", incompleteClone: " << incompleteClone << endl; + if( logLevel >= 6 ) + log(6) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty << ", incompleteClone: " << incompleteClone << endl; // always apply admin command command // this is a bit hacky -- the semantics of replication/commands aren't well specified @@ -1050,17 +1036,17 @@ if ( !only.empty() ) { b.appendRegex("ns", string("^") + only); } - BSONObj last = conn->findOne( _ns.c_str(), Query( b.done() ).sort( BSON( "$natural" << -1 ) ) ); + BSONObj last = oplogReader.findOne( _ns.c_str(), Query( b.done() ).sort( BSON( "$natural" << -1 ) ) ); if ( !last.isEmpty() ) { BSONElement ts = last.getField( "ts" ); - massert( 10386 , (string)"non Date ts found:" + last.jsonString() , ts.type() == Date || ts.type() == Timestamp ); + massert( 10386 , "non Date ts found: " + last.toString(), ts.type() == Date || ts.type() == Timestamp ); syncedTo = OpTime( ts.date() ); } } OpTime ReplSource::nextLastSavedLocalTs() const { Client::Context ctx( "local.oplog.$main" ); - auto_ptr< Cursor > c = findTableScan( "local.oplog.$main", BSON( "$natural" << -1 ) ); + shared_ptr c = findTableScan( "local.oplog.$main", BSON( "$natural" << -1 ) ); if ( c->ok() ) return OpTime( c->current().getField( "ts" ).date() ); return OpTime(); @@ -1076,19 +1062,19 @@ log() << "Sending forcedead command to slave to stop its replication\n"; log() << "Host: " << hostName << " paired: " << paired << endl; massert( 10387 , "request to kill slave replication failed", - conn->simpleCommand( "admin", 0, "forcedead" ) ); + oplogReader.conn()->simpleCommand( "admin", 0, "forcedead" ) ); syncToTailOfRemoteLog(); { dblock lk; setLastSavedLocalTs( nextLastSavedLocalTs() ); save(); - cursor.reset(); + oplogReader.resetCursor(); } } bool ReplSource::updateSetsWithLocalOps( OpTime &localLogTail, bool mayUnlock ) { Client::Context ctx( "local.oplog.$main" ); - auto_ptr< Cursor > localLog = findTableScan( "local.oplog.$main", BSON( "$natural" << -1 ) ); + shared_ptr localLog = findTableScan( "local.oplog.$main", BSON( "$natural" << -1 ) ); OpTime newTail; for( ; localLog->ok(); localLog->advance() ) { BSONObj op = localLog->current(); @@ -1119,17 +1105,17 @@ /* slave: pull some data from the master's oplog note: not yet in db mutex at this point. + @return -1 error + 0 ok, don't sleep + 1 ok, sleep */ - bool ReplSource::sync_pullOpLog(int& nApplied) { + int ReplSource::sync_pullOpLog(int& nApplied) { + int okResultCode = 1; string ns = string("local.oplog.$") + sourceName(); log(2) << "repl: sync_pullOpLog " << ns << " syncedTo:" << syncedTo.toStringLong() << '\n'; bool tailing = true; - DBClientCursor *c = cursor.get(); - if ( c && c->isDead() ) { - log() << "repl: old cursor isDead, initiating a new one\n"; - c = 0; - } + oplogReader.tailCheck(); if ( replPair && replPair->state == ReplPair::State_Master ) { dblock lk; @@ -1139,12 +1125,12 @@ bool initial = syncedTo.isNull(); - if ( c == 0 || initial ) { + if ( !oplogReader.haveCursor() || initial ) { if ( initial ) { // Important to grab last oplog timestamp before listing databases. syncToTailOfRemoteLog(); BSONObj info; - bool ok = conn->runCommand( "admin", BSON( "listDatabases" << 1 ), info ); + bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info ); massert( 10389 , "Unable to get database list", ok ); BSONObjIterator i( info.getField( "databases" ).embeddedObject() ); while( i.moreWithEOO() ) { @@ -1171,27 +1157,22 @@ query.append("ts", q.done()); if ( !only.empty() ) { // note we may here skip a LOT of data table scanning, a lot of work for the master. - query.appendRegex("ns", string("^") + only); + query.appendRegex("ns", string("^") + only); // maybe append "\\." here? } BSONObj queryObj = query.done(); - // queryObj = { ts: { $gte: syncedTo } } + // e.g. queryObj = { ts: { $gte: syncedTo } } - log(2) << "repl: " << ns << ".find(" << queryObj.toString() << ')' << '\n'; - cursor = conn->query( ns.c_str(), queryObj, 0, 0, 0, - QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay | - QueryOption_AwaitData - ); - c = cursor.get(); + oplogReader.tailingQuery(ns.c_str(), queryObj); tailing = false; } else { log(2) << "repl: tailing=true\n"; } - if ( c == 0 ) { - problem() << "repl: dbclient::query returns null (conn closed?)" << endl; - resetConnection(); - return false; + if( !oplogReader.haveCursor() ) { + problem() << "repl: dbclient::query returns null (conn closed?)" << endl; + oplogReader.resetConnection(); + return -1; } // show any deferred database creates from a previous pass @@ -1206,9 +1187,12 @@ } } - if ( !c->more() ) { + if ( !oplogReader.more() ) { if ( tailing ) { log(2) << "repl: tailing & no new activity\n"; + if( oplogReader.awaitCapable() ) + okResultCode = 0; // don't sleep + } else { log() << "repl: " << ns << " oplog is empty\n"; } @@ -1217,24 +1201,31 @@ OpTime nextLastSaved = nextLastSavedLocalTs(); { dbtemprelease t; - if ( !c->more() ) { + if ( !oplogReader.more() ) { setLastSavedLocalTs( nextLastSaved ); } } save(); } - return true; + return okResultCode; } OpTime nextOpTime; { - BSONObj op = c->next(); + BSONObj op = oplogReader.next(); BSONElement ts = op.getField("ts"); if ( ts.type() != Date && ts.type() != Timestamp ) { string err = op.getStringField("$err"); if ( !err.empty() ) { - problem() << "repl: $err reading remote oplog: " + err << '\n'; - massert( 10390 , "got $err reading remote oplog", false ); + // 13051 is "tailable cursor requested on non capped collection" + if (op.getIntField("code") == 13051) { + problem() << "trying to slave off of a non-master" << '\n'; + massert( 13344 , "trying to slave off of a non-master", false ); + } + else { + problem() << "repl: $err reading remote oplog: " + err << '\n'; + massert( 10390 , "got $err reading remote oplog", false ); + } } else { problem() << "repl: bad object read from remote oplog: " << op.toString() << '\n'; @@ -1248,7 +1239,7 @@ if ( !tailing && !initial && next != syncedTo ) { log() << "remote slave log filled, forcing slave resync" << endl; resetSlave(); - return true; + return 1; } dblock lk; @@ -1260,9 +1251,15 @@ if ( tailing || initial ) { if ( initial ) log(1) << "repl: initial run\n"; - else - assert( syncedTo < nextOpTime ); - c->putBack( op ); // op will be processed in the loop below + else { + if( !( syncedTo <= nextOpTime ) ) { + log() << "repl ASSERTION failed : syncedTo <= nextOpTime" << endl; + log() << "repl syncTo: " << syncedTo.toStringLong() << endl; + log() << "repl nextOpTime: " << nextOpTime.toStringLong() << endl; + assert(false); + } + } + oplogReader.putBack( op ); // op will be processed in the loop below nextOpTime = OpTime(); // will reread the op below } else if ( nextOpTime != syncedTo ) { // didn't get what we queried for - error @@ -1305,17 +1302,24 @@ 1) find most recent op in local log 2) more()? */ - if ( !c->more() ) { + + bool moreInitialSyncsPending = !addDbNextPass.empty() && n; // we need "&& n" to assure we actually process at least one op to get a sync point recorded in the first place. + + if ( moreInitialSyncsPending || !oplogReader.more() ) { dblock lk; OpTime nextLastSaved = nextLastSavedLocalTs(); { dbtemprelease t; - if ( c->more() ) { - continue; + if ( !moreInitialSyncsPending && oplogReader.more() ) { + if ( getInitialSyncCompleted() ) { // if initial sync hasn't completed, break out of loop so we can set to completed or clone more dbs + continue; + } } else { setLastSavedLocalTs( nextLastSaved ); } } + if( oplogReader.awaitCapable() && tailing ) + okResultCode = 0; // don't sleep syncedTo = nextOpTime; save(); // note how far we are synced up to now log() << "repl: applied " << n << " operations" << endl; @@ -1323,6 +1327,8 @@ log() << "repl: end sync_pullOpLog syncedTo: " << syncedTo.toStringLong() << endl; break; } + else { + } OCCASIONALLY if( n > 0 && ( n > 100000 || time(0) - saveLast > 60 ) ) { // periodically note our progress, in case we are doing a lot of work and crash @@ -1336,7 +1342,7 @@ n = 0; } - BSONObj op = c->next(); + BSONObj op = oplogReader.next(); BSONElement ts = op.getField("ts"); if( !( ts.type() == Date || ts.type() == Timestamp ) ) { log() << "sync error: problem querying remote oplog record\n"; @@ -1356,7 +1362,7 @@ uassert( 10123 , "replication error last applied optime at slave >= nextOpTime from master", false); } if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) { - c->putBack( op ); + oplogReader.putBack( op ); _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1; dblock lk; if ( n > 0 ) { @@ -1374,11 +1380,11 @@ } } - return true; + return okResultCode; } BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}"); - + bool replAuthenticate(DBClientConnection *conn) { if( ! cc().isAdmin() ){ log() << "replauthenticate: requires admin permissions, failing\n"; @@ -1399,6 +1405,7 @@ return false; } } + } string u = user.getStringField("user"); @@ -1413,12 +1420,37 @@ return true; } - bool ReplSource::connect() { - if ( conn.get() == 0 ) { - conn = auto_ptr(new DBClientConnection()); + bool replHandshake(DBClientConnection *conn) { + + BSONObj me; + { + dblock l; + if ( ! Helpers::getSingleton( "local.me" , me ) ){ + BSONObjBuilder b; + b.appendOID( "_id" , 0 , true ); + me = b.obj(); + Helpers::putSingleton( "local.me" , me ); + } + } + + BSONObjBuilder cmd; + cmd.appendAs( me["_id"] , "handshake" ); + + BSONObj res; + bool ok = conn->runCommand( "admin" , cmd.obj() , res ); + // ignoring for now on purpose for older versions + log(ok) << "replHandshake res not: " << ok << " res: " << res << endl; + return true; + } + + bool OplogReader::connect(string hostName) { + if( conn() == 0 ) { + _conn = auto_ptr(new DBClientConnection( false, 0, replPair ? 20 : 0 /* tcp timeout */)); string errmsg; ReplInfo r("trying to connect to sync source"); - if ( !conn->connect(hostName.c_str(), errmsg) || !replAuthenticate(conn.get()) ) { + if ( !_conn->connect(hostName.c_str(), errmsg) || + !replAuthenticate(_conn.get()) || + !replHandshake(_conn.get()) ) { resetConnection(); log() << "repl: " << errmsg << endl; return false; @@ -1428,9 +1460,10 @@ } /* note: not yet in mutex at this point. - returns true if everything happy. return false if you want to reconnect. + returns >= 0 if ok. return -1 if you want to reconnect. + return value of zero indicates no sleep necessary before next call */ - bool ReplSource::sync(int& nApplied) { + int ReplSource::sync(int& nApplied) { _sleepAdviceTime = 0; ReplInfo r("sync"); if ( !cmdLine.quiet ) { @@ -1447,24 +1480,24 @@ if ( (string("localhost") == hostName || string("127.0.0.1") == hostName) && cmdLine.port == CmdLine::DefaultDBPort ) { log() << "repl: can't sync from self (localhost). sources configuration may be wrong." << endl; sleepsecs(5); - return false; + return -1; } - if ( !connect() ) { + if ( !oplogReader.connect(hostName) ) { log(4) << "repl: can't connect to sync source" << endl; if ( replPair && paired ) { assert( startsWith(hostName.c_str(), replPair->remoteHost.c_str()) ); replPair->arbitrate(); } - return false; + return -1; } if ( paired ) { - int remote = replPair->negotiate(conn.get(), "direct"); + int remote = replPair->negotiate(oplogReader.conn(), "direct"); int nMasters = ( remote == ReplPair::State_Master ) + ( replPair->state == ReplPair::State_Master ); if ( getInitialSyncCompleted() && nMasters != 1 ) { log() << ( nMasters == 0 ? "no master" : "two masters" ) << ", deferring oplog pull" << endl; - return true; + return 1; } } @@ -1484,112 +1517,6 @@ return sync_pullOpLog(nApplied); } - /* -- Logging of operations -------------------------------------*/ - -// cached copies of these...so don't rename them - NamespaceDetails *localOplogMainDetails = 0; - Database *localOplogDB = 0; - - void replCheckCloseDatabase( Database * db ){ - localOplogDB = 0; - localOplogMainDetails = 0; - } - - /* we write to local.opload.$main: - { ts : ..., op: ..., ns: ..., o: ... } - ts: an OpTime timestamp - op: - "i" insert - "u" update - "d" delete - "c" db cmd - "db" declares presence of a database (ns is set to the db name + '.') - "n" no op - logNS - e.g. "local.oplog.$main" - bb: - if not null, specifies a boolean to pass along to the other side as b: param. - used for "justOne" or "upsert" flags on 'd', 'u' - first: true - when set, indicates this is the first thing we have logged for this database. - thus, the slave does not need to copy down all the data when it sees this. - */ - static void _logOp(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb, const OpTime &ts ) { - if ( strncmp(ns, "local.", 6) == 0 ) - return; - - DEV assertInWriteLock(); - - Client::Context context; - - /* we jump through a bunch of hoops here to avoid copying the obj buffer twice -- - instead we do a single copy to the destination position in the memory mapped file. - */ - - BSONObjBuilder b; - b.appendTimestamp("ts", ts.asDate()); - b.append("op", opstr); - b.append("ns", ns); - if ( bb ) - b.appendBool("b", *bb); - if ( o2 ) - b.append("o2", *o2); - BSONObj partial = b.done(); - int posz = partial.objsize(); - int len = posz + obj.objsize() + 1 + 2 /*o:*/; - - Record *r; - if ( strncmp( logNS, "local.", 6 ) == 0 ) { // For now, assume this is olog main - if ( localOplogMainDetails == 0 ) { - Client::Context ctx("local.", dbpath, 0, false); - localOplogDB = ctx.db(); - localOplogMainDetails = nsdetails(logNS); - } - Client::Context ctx( "" , localOplogDB, false ); - r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len); - } else { - Client::Context ctx( logNS, dbpath, 0, false ); - assert( nsdetails( logNS ) ); - r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len); - } - - char *p = r->data; - memcpy(p, partial.objdata(), posz); - *((unsigned *)p) += obj.objsize() + 1 + 2; - p += posz - 1; - *p++ = (char) Object; - *p++ = 'o'; - *p++ = 0; - memcpy(p, obj.objdata(), obj.objsize()); - p += obj.objsize(); - *p = EOO; - - if ( logLevel >= 6 ) { - BSONObj temp(r); - log( 6 ) << "logging op:" << temp << endl; - } - } - - static void logKeepalive() { - BSONObj obj; - _logOp("n", "", "local.oplog.$main", obj, 0, 0, OpTime::now()); - } - - void logOp(const char *opstr, const char *ns, const BSONObj& obj, BSONObj *patt, bool *b) { - if ( replSettings.master ) { - _logOp(opstr, ns, "local.oplog.$main", obj, patt, b, OpTime::now()); - char cl[ 256 ]; - nsToDatabase( ns, cl ); - } - NamespaceDetailsTransient &t = NamespaceDetailsTransient::get_w( ns ); - if ( t.cllEnabled() ) { - try { - _logOp(opstr, ns, t.cllNS().c_str(), obj, patt, b, OpTime::now()); - } catch ( const DBException & ) { - t.cllInvalidate(); - } - } - } - /* --------------------------------------------------------------*/ /* @@ -1620,11 +1547,11 @@ int sleepAdvice = 1; for ( ReplSource::SourceVector::iterator i = sources.begin(); i != sources.end(); i++ ) { ReplSource *s = i->get(); - bool ok = false; + int res = -1; try { - ok = s->sync(nApplied); + res = s->sync(nApplied); bool moreToSync = s->haveMoreDbsToSync(); - if( !ok ) { + if( res < 0 ) { sleepAdvice = 3; } else if( moreToSync ) { @@ -1633,7 +1560,9 @@ else if ( s->sleepAdvice() ) { sleepAdvice = s->sleepAdvice(); } - if ( ok && !moreToSync /*&& !s->syncedTo.isNull()*/ ) { + else + sleepAdvice = res; + if ( res >= 0 && !moreToSync /*&& !s->syncedTo.isNull()*/ ) { pairSync->setInitialSyncCompletedLocking(); } } @@ -1663,8 +1592,8 @@ log() << "unexpected exception during replication. replication will halt" << endl; replAllDead = "caught unexpected exception during replication"; } - if ( !ok ) - s->resetConnection(); + if ( res < 0 ) + s->oplogReader.resetConnection(); } return sleepAdvice; } @@ -1702,6 +1631,12 @@ assert( syncing == 1 ); syncing--; } + + if( relinquishSyncingSome ) { + relinquishSyncingSome = 0; + s = 1; // sleep before going back in to syncing=1 + } + if ( s ) { stringstream ss; ss << "repl: sleep " << s << "sec before next pass"; @@ -1719,16 +1654,21 @@ static void replMasterThread() { sleepsecs(4); Client::initThread("replmaster"); + int toSleep = 10; while( 1 ) { - sleepsecs(10); + + sleepsecs( toSleep ); /* write a keep-alive like entry to the log. this will make things like printReplicationStatus() and printSlaveReplicationStatus() stay up-to-date even when things are idle. */ { - writelocktry lk(""); + writelocktry lk("",1); if ( lk.got() ){ + toSleep = 10; + cc().getAuthenticationInfo()->authorize("admin"); + try { logKeepalive(); } @@ -1736,6 +1676,10 @@ log() << "caught exception in replMasterThread()" << endl; } } + else { + log(5) << "couldn't logKeepalive" << endl; + toSleep = 1; + } } } } @@ -1743,6 +1687,7 @@ void replSlaveThread() { sleepsecs(1); Client::initThread("replslave"); + cc().iAmSyncThread(); { dblock lk; @@ -1778,64 +1723,29 @@ } } - void createOplog() { - dblock lk; - - const char * ns = "local.oplog.$main"; - Client::Context ctx(ns); - - if ( nsdetails( ns ) ) { - DBDirectClient c; - BSONObj lastOp = c.findOne( ns, Query().sort( BSON( "$natural" << -1 ) ) ); - if ( !lastOp.isEmpty() ) { - OpTime::setLast( lastOp[ "ts" ].date() ); + void newRepl(); + void oldRepl(); + void startReplication() { + /* if we are going to be a replica set, we aren't doing other forms of replication. */ + if( !cmdLine._replSet.empty() ) { + if( replSettings.slave || replSettings.master || replPair ) { + log() << "***" << endl; + log() << "ERROR: can't use --slave or --master replication options with --replSet" << endl; + log() << "***" << endl; } + createOplog(); + newRepl(); return; } - - /* create an oplog collection, if it doesn't yet exist. */ - BSONObjBuilder b; - double sz; - if ( cmdLine.oplogSize != 0 ) - sz = (double)cmdLine.oplogSize; - else { - /* not specified. pick a default size */ - sz = 50.0 * 1000 * 1000; - if ( sizeof(int *) >= 8 ) { -#if defined(__APPLE__) - // typically these are desktops (dev machines), so keep it smallish - sz = (256-64) * 1000 * 1000; -#else - sz = 990.0 * 1000 * 1000; - boost::intmax_t free = freeSpace(); //-1 if call not supported. - double fivePct = free * 0.05; - if ( fivePct > sz ) - sz = fivePct; -#endif - } - } - log() << "******\n"; - log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB (use --oplogSize to change)\n"; - log() << "******" << endl; - - b.append("size", sz); - b.appendBool("capped", 1); - b.appendBool("autoIndexId", false); - - string err; - BSONObj o = b.done(); - userCreateNS(ns, o, err, false); - logOp( "n", "dummy", BSONObj() ); - } - - void startReplication() { + oldRepl(); + /* this was just to see if anything locks for longer than it should -- we need to be careful not to be locked when trying to connect() or query() the other side. */ //boost::thread tempt(tempThread); - if ( !replSettings.slave && !replSettings.master && !replPair ) + if( !replSettings.slave && !replSettings.master && !replPair ) return; { @@ -1846,11 +1756,11 @@ if ( replSettings.slave || replPair ) { if ( replSettings.slave ) { - assert( replSettings.slave == SimpleSlave ); + assert( replSettings.slave == SimpleSlave ); log(1) << "slave=true" << endl; - } - else - replSettings.slave = ReplPairSlave; + } + else + replSettings.slave = ReplPairSlave; boost::thread repl_thread(replSlaveThread); } @@ -1871,56 +1781,29 @@ replPair = new ReplPair(remoteEnd, arb); } - class CmdLogCollection : public Command { - public: - virtual bool slaveOk() { - return false; - } - virtual LockType locktype(){ return WRITE; } - CmdLogCollection() : Command( "logCollection" ) {} - virtual void help( stringstream &help ) const { - help << "examples: { logCollection: , start: 1 }, " - << "{ logCollection: , validateComplete: 1 }"; - } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - string logCollection = cmdObj.getStringField( "logCollection" ); - if ( logCollection.empty() ) { - errmsg = "missing logCollection spec"; - return false; - } - bool start = !cmdObj.getField( "start" ).eoo(); - bool validateComplete = !cmdObj.getField( "validateComplete" ).eoo(); - if ( start ? validateComplete : !validateComplete ) { - errmsg = "Must specify exactly one of start:1 or validateComplete:1"; - return false; - } - int logSizeMb = cmdObj.getIntField( "logSizeMb" ); - NamespaceDetailsTransient &t = NamespaceDetailsTransient::get_w( logCollection.c_str() ); - if ( start ) { - if ( t.cllNS().empty() ) { - if ( logSizeMb == INT_MIN ) { - t.cllStart(); - } else { - t.cllStart( logSizeMb ); - } - } else { - errmsg = "Log already started for ns: " + logCollection; - return false; - } - } else { - if ( t.cllNS().empty() ) { - errmsg = "No log to validateComplete for ns: " + logCollection; - return false; - } else { - if ( !t.cllValidateComplete() ) { - errmsg = "Oplog failure, insufficient space allocated"; - return false; - } - } - } - log() << "started logCollection with cmd obj: " << cmdObj << endl; - return true; + void testPretouch() { + int nthr = min(8, 8); + nthr = max(nthr, 1); + int m = 8 / nthr; + ThreadPool tp(nthr); + vector v; + + BSONObj x = BSON( "ns" << "test.foo" << "o" << BSON( "_id" << 1 ) << "op" << "i" ); + + v.push_back(x); + v.push_back(x); + v.push_back(x); + + unsigned a = 0; + while( 1 ) { + if( a >= v.size() ) break; + unsigned b = a + m - 1; // v[a..b] + if( b >= v.size() ) b = v.size() - 1; + tp.schedule(pretouchN, v, a, b); + DEV cout << "pretouch task: " << a << ".." << b << endl; + a += m; } - } cmdlogcollection; + tp.join(); + } } // namespace mongo diff -Nru mongodb-1.4.4/db/repl.h mongodb-1.6.3/db/repl.h --- mongodb-1.4.4/db/repl.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/repl.h 2010-09-24 10:02:42.000000000 -0700 @@ -23,7 +23,6 @@ at the master: local.oplog.$ - local.oplog.$main is the default */ #pragma once @@ -33,16 +32,14 @@ #include "dbhelpers.h" #include "query.h" #include "queryoptimizer.h" - #include "../client/dbclient.h" - #include "../util/optime.h" +#include "oplog.h" +#include "../util/concurrency/thread_pool.h" +#include "oplogreader.h" namespace mongo { - class DBClientConnection; - class DBClientCursor; - /* replication slave? (possibly with slave or repl pair nonmaster) --slave cmd line setting -> SimpleSlave */ @@ -79,10 +76,9 @@ /* A replication exception */ class SyncException : public DBException { public: - virtual const char* what() const throw() { return "sync exception"; } - virtual int getCode(){ return 10001; } + SyncException() : DBException( "sync exception" , 10001 ){} }; - + /* A Source is a source from which we can pull (replicate) data. stored in collection local.sources. @@ -94,16 +90,15 @@ not done (always use main for now). */ class ReplSource { + auto_ptr tp; + bool resync(string db); /* pull some operations from the master's oplog, and apply them. */ - bool sync_pullOpLog(int& nApplied); + int sync_pullOpLog(int& nApplied); void sync_pullOpLog_applyOperation(BSONObj& op, OpTime *localLogTail); - auto_ptr conn; - auto_ptr cursor; - /* we only clone one database per pass, even if a lot need done. This helps us avoid overflowing the master's transaction log by doing too much work before going back to read more transactions. (Imagine a scenario of slave startup where we try to @@ -117,8 +112,6 @@ // returns the dummy ns used to do the drop string resyncDrop( const char *db, const char *requester ); - // returns true if connected on return - bool connect(); // returns possibly unowned id spec for the operation. static BSONObj idForOp( const BSONObj &op, bool &mod ); static void updateSetsWithOp( const BSONObj &op, bool mayUpdateStorage ); @@ -136,6 +129,8 @@ unsigned _sleepAdviceTime; public: + OplogReader oplogReader; + static void applyOperation(const BSONObj& op); bool replacing; // in "replace mode" -- see CmdReplacePeer bool paired; // --pair in use @@ -162,12 +157,11 @@ typedef vector< shared_ptr< ReplSource > > SourceVector; static void loadAll(SourceVector&); explicit ReplSource(BSONObj); - bool sync(int& nApplied); + + /* -1 = error */ + int sync(int& nApplied); + void save(); // write ourself to local.sources - void resetConnection() { - cursor = auto_ptr(0); - conn = auto_ptr(0); - } // make a jsobj from our member fields of the form // { host: ..., source: ..., syncedTo: ... } @@ -176,7 +170,7 @@ bool operator==(const ReplSource&r) const { return hostName == r.hostName && sourceName() == r.sourceName(); } - operator string() const { return sourceName() + "@" + hostName; } + string toString() const { return sourceName() + "@" + hostName; } bool haveMoreDbsToSync() const { return !addDbNextPass.empty(); } int sleepAdvice() const { @@ -191,15 +185,6 @@ void forceResync( const char *requester ); }; - /* Write operation to the log (local.oplog.$main) - "i" insert - "u" update - "d" delete - "c" db cmd - "db" declares presence of a database (ns is set to the db name + '.') - */ - void logOp(const char *opstr, const char *ns, const BSONObj& obj, BSONObj *patt = 0, bool *b = 0); - // class for managing a set of ids in memory class MemIds { public: @@ -342,151 +327,5 @@ bool anyReplEnabled(); void appendReplicationInfo( BSONObjBuilder& result , bool authed , int level = 0 ); - void replCheckCloseDatabase( Database * db ); - extern int __findingStartInitialTimeout; // configurable for testing - - class FindingStartCursor { - public: - FindingStartCursor( const QueryPlan & qp ) : - _qp( qp ), - _findingStart( true ), - _findingStartMode(), - _findingStartTimer( 0 ), - _findingStartCursor( 0 ) - { init(); } - bool done() const { return !_findingStart; } - auto_ptr< Cursor > cRelease() { return _c; } - void next() { - if ( !_findingStartCursor || !_findingStartCursor->c->ok() ) { - _findingStart = false; - _c = _qp.newCursor(); // on error, start from beginning - destroyClientCursor(); - return; - } - switch( _findingStartMode ) { - case Initial: { - if ( !_matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) { - _findingStart = false; // found first record out of query range, so scan normally - _c = _qp.newCursor( _findingStartCursor->c->currLoc() ); - destroyClientCursor(); - return; - } - _findingStartCursor->c->advance(); - RARELY { - if ( _findingStartTimer.seconds() >= __findingStartInitialTimeout ) { - createClientCursor( startLoc( _findingStartCursor->c->currLoc() ) ); - _findingStartMode = FindExtent; - return; - } - } - maybeRelease(); - return; - } - case FindExtent: { - if ( !_matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) { - _findingStartMode = InExtent; - return; - } - DiskLoc prev = prevLoc( _findingStartCursor->c->currLoc() ); - if ( prev.isNull() ) { // hit beginning, so start scanning from here - createClientCursor(); - _findingStartMode = InExtent; - return; - } - // There might be a more efficient implementation than creating new cursor & client cursor each time, - // not worrying about that for now - createClientCursor( prev ); - maybeRelease(); - return; - } - case InExtent: { - if ( _matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) { - _findingStart = false; // found first record in query range, so scan normally - _c = _qp.newCursor( _findingStartCursor->c->currLoc() ); - destroyClientCursor(); - return; - } - _findingStartCursor->c->advance(); - maybeRelease(); - return; - } - default: { - massert( 12600, "invalid _findingStartMode", false ); - } - } - } - private: - enum FindingStartMode { Initial, FindExtent, InExtent }; - const QueryPlan &_qp; - bool _findingStart; - FindingStartMode _findingStartMode; - auto_ptr< CoveredIndexMatcher > _matcher; - Timer _findingStartTimer; - ClientCursor * _findingStartCursor; - auto_ptr< Cursor > _c; - DiskLoc startLoc( const DiskLoc &rec ) { - Extent *e = rec.rec()->myExtent( rec ); - if ( !_qp.nsd()->capLooped() || ( e->myLoc != _qp.nsd()->capExtent ) ) - return e->firstRecord; - // Likely we are on the fresh side of capExtent, so return first fresh record. - // If we are on the stale side of capExtent, then the collection is small and it - // doesn't matter if we start the extent scan with capFirstNewRecord. - return _qp.nsd()->capFirstNewRecord; - } - - // should never have an empty extent in the oplog, so don't worry about that case - DiskLoc prevLoc( const DiskLoc &rec ) { - Extent *e = rec.rec()->myExtent( rec ); - if ( _qp.nsd()->capLooped() ) { - if ( e->xprev.isNull() ) - e = _qp.nsd()->lastExtent.ext(); - else - e = e->xprev.ext(); - if ( e->myLoc != _qp.nsd()->capExtent ) - return e->firstRecord; - } else { - if ( !e->xprev.isNull() ) { - e = e->xprev.ext(); - return e->firstRecord; - } - } - return DiskLoc(); // reached beginning of collection - } - void createClientCursor( const DiskLoc &startLoc = DiskLoc() ) { - auto_ptr c = _qp.newCursor( startLoc ); - _findingStartCursor = new ClientCursor(c, _qp.ns(), false); - } - void destroyClientCursor() { - if ( _findingStartCursor ) { - ClientCursor::erase( _findingStartCursor->cursorid ); - _findingStartCursor = 0; - } - } - void maybeRelease() { - RARELY { - CursorId id = _findingStartCursor->cursorid; - _findingStartCursor->updateLocation(); - { - dbtemprelease t; - } - _findingStartCursor = ClientCursor::find( id, false ); - } - } - void init() { - // Use a ClientCursor here so we can release db mutex while scanning - // oplog (can take quite a while with large oplogs). - auto_ptr c = _qp.newReverseCursor(); - _findingStartCursor = new ClientCursor(c, _qp.ns(), false); - _findingStartTimer.reset(); - _findingStartMode = Initial; - BSONElement tsElt = _qp.query()[ "ts" ]; - massert( 13044, "no ts field in query", !tsElt.eoo() ); - BSONObjBuilder b; - b.append( tsElt ); - BSONObj tsQuery = b.obj(); - _matcher.reset(new CoveredIndexMatcher(tsQuery, _qp.indexKey())); - } - }; - } // namespace mongo diff -Nru mongodb-1.4.4/db/replpair.h mongodb-1.6.3/db/replpair.h --- mongodb-1.4.4/db/replpair.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/replpair.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,236 @@ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "db.h" +#include "dbhelpers.h" +#include "json.h" +#include "../client/dbclient.h" +#include "repl.h" +#include "cmdline.h" +#include "repl/rs.h" + +namespace mongo { + + extern const char *replAllDead; + + /* ReplPair is a pair of db servers replicating to one another and cooperating. + + Only one member of the pair is active at a time; so this is a smart master/slave + configuration basically. + + You may read from the slave at anytime though (if you don't mind the slight lag). + + todo: Could be extended to be more than a pair, thus the name 'Set' -- for example, + a set of 3... + */ + + class ReplPair { + public: + enum ReplState { + State_CantArb = -3, + State_Confused = -2, + State_Negotiating = -1, + State_Slave = 0, + State_Master = 1 + }; + + int state; + ThreadSafeString info; // commentary about our current state + string arbHost; // "-" for no arbiter. "host[:port]" + int remotePort; + string remoteHost; + string remote; // host:port if port specified. + // int date; // -1 not yet set; 0=slave; 1=master + + string getInfo() { + stringstream ss; + ss << " state: "; + if ( state == 1 ) ss << "1 State_Master "; + else if ( state == 0 ) ss << "0 State_Slave"; + else + ss << "" << state << ""; + ss << '\n'; + ss << " info: " << info << '\n'; + ss << " arbhost: " << arbHost << '\n'; + ss << " remote: " << remoteHost << ':' << remotePort << '\n'; +// ss << " date: " << date << '\n'; + return ss.str(); + } + + ReplPair(const char *remoteEnd, const char *arbiter); + virtual ~ReplPair() {} + + bool dominant(const string& myname) { + if ( myname == remoteHost ) + return cmdLine.port > remotePort; + return myname > remoteHost; + } + + void setMasterLocked( int n, const char *_comment = "" ) { + dblock p; + setMaster( n, _comment ); + } + + void setMaster(int n, const char *_comment = ""); + + /* negotiate with our peer who is master; returns state of peer */ + int negotiate(DBClientConnection *conn, string method); + + /* peer unreachable, try our arbitrator */ + void arbitrate(); + + virtual + DBClientConnection *newClientConnection() const { + return new DBClientConnection(); + } + }; + + extern ReplPair *replPair; + + /* note we always return true for the "local" namespace. + + we should not allow most operations when not the master + also we report not master if we are "dead". + + See also CmdIsMaster. + + If 'client' is not specified, the current client is used. + */ + inline bool _isMaster() { + if( replSet ) { + if( theReplSet ) + return theReplSet->isPrimary(); + return false; + } + + if( ! replSettings.slave ) + return true; + + if ( replAllDead ) + return false; + + if ( replPair ) { + if( replPair->state == ReplPair::State_Master ) + return true; + } + else { + if( replSettings.master ) { + // if running with --master --slave, allow. note that master is also true + // for repl pairs so the check for replPair above is important. + return true; + } + } + + if ( cc().isGod() ) + return true; + + return false; + } + inline bool isMaster(const char *client = 0) { + if( _isMaster() ) + return true; + if ( !client ) { + Database *database = cc().database(); + assert( database ); + client = database->name.c_str(); + } + return strcmp( client, "local" ) == 0; + } + + inline void notMasterUnless(bool expr) { + uassert( 10107 , "not master" , expr ); + } + + /* we allow queries to SimpleSlave's -- but not to the slave (nonmaster) member of a replica pair + so that queries to a pair are realtime consistent as much as possible. use setSlaveOk() to + query the nonmaster member of a replica pair. + */ + inline void replVerifyReadsOk(ParsedQuery& pq) { + if( replSet ) { + /* todo: speed up the secondary case. as written here there are 2 mutex entries, it can be 1. */ + if( isMaster() ) return; + notMasterUnless( pq.hasOption(QueryOption_SlaveOk) && theReplSet && theReplSet->isSecondary() ); + } else { + notMasterUnless(isMaster() || pq.hasOption(QueryOption_SlaveOk) || replSettings.slave == SimpleSlave ); + } + } + + inline bool isMasterNs( const char *ns ) { + char cl[ 256 ]; + nsToDatabase( ns, cl ); + return isMaster( cl ); + } + + inline ReplPair::ReplPair(const char *remoteEnd, const char *arb) { + state = -1; + remote = remoteEnd; + remotePort = CmdLine::DefaultDBPort; + remoteHost = remoteEnd; + const char *p = strchr(remoteEnd, ':'); + if ( p ) { + remoteHost = string(remoteEnd, p-remoteEnd); + remotePort = atoi(p+1); + uassert( 10125 , "bad port #", remotePort > 0 && remotePort < 0x10000 ); + if ( remotePort == CmdLine::DefaultDBPort ) + remote = remoteHost; // don't include ":27017" as it is default; in case ran in diff ways over time to normalizke the hostname format in sources collection + } + + uassert( 10126 , "arbiter parm is missing, use '-' for none", arb); + arbHost = arb; + uassert( 10127 , "arbiter parm is empty", !arbHost.empty()); + } + + /* This is set to true if we have EVER been up to date -- this way a new pair member + which is a replacement won't go online as master until we have initially fully synced. + */ + class PairSync { + int initialsynccomplete; + public: + PairSync() { + initialsynccomplete = -1; + } + + /* call before using the class. from dbmutex */ + void init() { + BSONObj o; + initialsynccomplete = 0; + if ( Helpers::getSingleton("local.pair.sync", o) ) + initialsynccomplete = 1; + } + + bool initialSyncCompleted() { + return initialsynccomplete != 0; + } + + void setInitialSyncCompleted() { + BSONObj o = fromjson("{\"initialsynccomplete\":1}"); + Helpers::putSingleton("local.pair.sync", o); + initialsynccomplete = 1; + tlog() << "pair: initial sync complete" << endl; + } + + void setInitialSyncCompletedLocking() { + if ( initialsynccomplete == 1 ) + return; + dblock lk; + setInitialSyncCompleted(); + } + }; + + +} // namespace mongo diff -Nru mongodb-1.4.4/db/replset.h mongodb-1.6.3/db/replset.h --- mongodb-1.4.4/db/replset.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/replset.h 1969-12-31 16:00:00.000000000 -0800 @@ -1,207 +0,0 @@ -/** -* Copyright (C) 2008 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see . -*/ - -#pragma once - -#include "db.h" -#include "dbhelpers.h" -#include "json.h" -#include "../client/dbclient.h" -#include "repl.h" -#include "cmdline.h" - -namespace mongo { - - extern const char *replAllDead; - - /* ReplPair is a pair of db servers replicating to one another and cooperating. - - Only one member of the pair is active at a time; so this is a smart master/slave - configuration basically. - - You may read from the slave at anytime though (if you don't mind the slight lag). - - todo: Could be extended to be more than a pair, thus the name 'Set' -- for example, - a set of 3... - */ - - class ReplPair { - public: - enum ReplState { - State_CantArb = -3, - State_Confused = -2, - State_Negotiating = -1, - State_Slave = 0, - State_Master = 1 - }; - - int state; - ThreadSafeString info; // commentary about our current state - string arbHost; // "-" for no arbiter. "host[:port]" - int remotePort; - string remoteHost; - string remote; // host:port if port specified. -// int date; // -1 not yet set; 0=slave; 1=master - - string getInfo() { - stringstream ss; - ss << " state: "; - if ( state == 1 ) ss << "1 State_Master "; - else if ( state == 0 ) ss << "0 State_Slave"; - else - ss << "" << state << ""; - ss << '\n'; - ss << " info: " << info << '\n'; - ss << " arbhost: " << arbHost << '\n'; - ss << " remote: " << remoteHost << ':' << remotePort << '\n'; -// ss << " date: " << date << '\n'; - return ss.str(); - } - - ReplPair(const char *remoteEnd, const char *arbiter); - virtual ~ReplPair() {} - - bool dominant(const string& myname) { - if ( myname == remoteHost ) - return cmdLine.port > remotePort; - return myname > remoteHost; - } - - void setMasterLocked( int n, const char *_comment = "" ) { - dblock p; - setMaster( n, _comment ); - } - - void setMaster(int n, const char *_comment = ""); - - /* negotiate with our peer who is master; returns state of peer */ - int negotiate(DBClientConnection *conn, string method); - - /* peer unreachable, try our arbitrator */ - void arbitrate(); - - virtual - DBClientConnection *newClientConnection() const { - return new DBClientConnection(); - } - }; - - extern ReplPair *replPair; - - /* note we always return true for the "local" namespace. - - we should not allow most operations when not the master - also we report not master if we are "dead". - - See also CmdIsMaster. - - If 'client' is not specified, the current client is used. - */ - inline bool isMaster( const char *client = 0 ) { - if( ! replSettings.slave ) - return true; - - if ( !client ) { - Database *database = cc().database(); - assert( database ); - client = database->name.c_str(); - } - - if ( replAllDead ) - return strcmp( client, "local" ) == 0; - - if ( replPair ) { - if( replPair->state == ReplPair::State_Master ) - return true; - } - else { - if( replSettings.master ) { - // if running with --master --slave, allow. note that master is also true - // for repl pairs so the check for replPair above is important. - return true; - } - } - - if ( cc().isGod() ) - return true; - - return strcmp( client, "local" ) == 0; - } - inline bool isMasterNs( const char *ns ) { - char cl[ 256 ]; - nsToDatabase( ns, cl ); - return isMaster( cl ); - } - - inline ReplPair::ReplPair(const char *remoteEnd, const char *arb) { - state = -1; - remote = remoteEnd; - remotePort = CmdLine::DefaultDBPort; - remoteHost = remoteEnd; - const char *p = strchr(remoteEnd, ':'); - if ( p ) { - remoteHost = string(remoteEnd, p-remoteEnd); - remotePort = atoi(p+1); - uassert( 10125 , "bad port #", remotePort > 0 && remotePort < 0x10000 ); - if ( remotePort == CmdLine::DefaultDBPort ) - remote = remoteHost; // don't include ":27017" as it is default; in case ran in diff ways over time to normalizke the hostname format in sources collection - } - - uassert( 10126 , "arbiter parm is missing, use '-' for none", arb); - arbHost = arb; - uassert( 10127 , "arbiter parm is empty", !arbHost.empty()); - } - - /* This is set to true if we have EVER been up to date -- this way a new pair member - which is a replacement won't go online as master until we have initially fully synced. - */ - class PairSync { - int initialsynccomplete; - public: - PairSync() { - initialsynccomplete = -1; - } - - /* call before using the class. from dbmutex */ - void init() { - BSONObj o; - initialsynccomplete = 0; - if ( Helpers::getSingleton("local.pair.sync", o) ) - initialsynccomplete = 1; - } - - bool initialSyncCompleted() { - return initialsynccomplete != 0; - } - - void setInitialSyncCompleted() { - BSONObj o = fromjson("{\"initialsynccomplete\":1}"); - Helpers::putSingleton("local.pair.sync", o); - initialsynccomplete = 1; - } - - void setInitialSyncCompletedLocking() { - if ( initialsynccomplete == 1 ) - return; - dblock lk; - BSONObj o = fromjson("{\"initialsynccomplete\":1}"); - Helpers::putSingleton("local.pair.sync", o); - initialsynccomplete = 1; - } - }; - - -} // namespace mongo diff -Nru mongodb-1.4.4/db/resource.h mongodb-1.6.3/db/resource.h --- mongodb-1.4.4/db/resource.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/resource.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,34 +1,16 @@ -//{{NO_DEPENDENCIES}} -// Microsoft Visual C++ generated include file. -// Used by db.rc - -/** -* Copyright (C) 2008 10gen Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see . -*/ - -namespace mongo { - -// Next default values for new objects -// -#ifdef APSTUDIO_INVOKED -#ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 101 -#define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1001 -#define _APS_NEXT_SYMED_VALUE 101 -#endif -#endif - -} // namespace mongo +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by db.rc +// +#define IDI_ICON2 102 + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 104 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1001 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif diff -Nru mongodb-1.4.4/db/restapi.cpp mongodb-1.6.3/db/restapi.cpp --- mongodb-1.4.4/db/restapi.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/restapi.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,310 @@ +/** @file resetapi.cpp + web rest api +*/ +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../util/miniwebserver.h" +#include "../util/mongoutils/html.h" +#include "../util/md5.hpp" +#include "instance.h" +#include "dbwebserver.h" +#include "dbhelpers.h" +#include "repl.h" +#include "replpair.h" +#include "clientcursor.h" +#include "background.h" + +namespace mongo { + + extern const char *replInfo; + bool getInitialSyncCompleted(); + + using namespace bson; + using namespace mongoutils::html; + + class RESTHandler : public DbWebHandler { + public: + RESTHandler() : DbWebHandler( "DUMMY REST" , 1000 , true ){} + + virtual bool handles( const string& url ) const { + return + url[0] == '/' && + url.find_last_of( '/' ) > 0; + } + + virtual void handle( const char *rq, string url, + string& responseMsg, int& responseCode, + vector& headers, const SockAddr &from ){ + + string::size_type first = url.find( "/" , 1 ); + if ( first == string::npos ) { + responseCode = 400; + return; + } + + string method = MiniWebServer::parseMethod( rq ); + string dbname = url.substr( 1 , first - 1 ); + string coll = url.substr( first + 1 ); + string action = ""; + + BSONObj params; + if ( coll.find( "?" ) != string::npos ) { + MiniWebServer::parseParams( params , coll.substr( coll.find( "?" ) + 1 ) ); + coll = coll.substr( 0 , coll.find( "?" ) ); + } + + string::size_type last = coll.find_last_of( "/" ); + if ( last == string::npos ) { + action = coll; + coll = "_defaultCollection"; + } + else { + action = coll.substr( last + 1 ); + coll = coll.substr( 0 , last ); + } + + for ( string::size_type i=0; i cursor = db.query( ns.c_str() , query, num , skip ); + uassert( 13085 , "query failed for dbwebserver" , cursor.get() ); + + if ( one ) { + if ( cursor->more() ) { + BSONObj obj = cursor->next(); + out << obj.jsonString(Strict,html?1:0) << '\n'; + } + else { + responseCode = 404; + } + return html != 0; + } + + if( html ) { + string title = string("query ") + ns; + out << start(title) + << p(title) + << "

";
+            } else {
+                out << "{\n";
+                out << "  \"offset\" : " << skip << ",\n";
+                out << "  \"rows\": [\n";
+            }
+
+            int howMany = 0;
+            while ( cursor->more() ) {
+                if ( howMany++ && html == 0 )
+                    out << " ,\n";
+                BSONObj obj = cursor->next();
+                if( html ) {
+                    if( out.tellp() > 4 * 1024 * 1024 ) {
+                        out << "Stopping output: more than 4MB returned and in html mode\n";
+                        break;
+                    }
+                    out << obj.jsonString(Strict, html?1:0) << "\n\n";
+                }
+                else {
+                    if( out.tellp() > 50 * 1024 * 1024 ) // 50MB limit - we are using ram
+                        break;
+                    out << "    " << obj.jsonString();
+                }
+            }
+
+            if( html ) { 
+                out << "
\n"; + if( howMany == 0 ) out << p("Collection is empty"); + out << _end(); + } + else { + out << "\n ],\n\n"; + out << " \"total_rows\" : " << howMany << " ,\n"; + out << " \"query\" : " << query.jsonString() << " ,\n"; + out << " \"millis\" : " << t.millis() << '\n'; + out << "}\n"; + } + + return html != 0; + } + + // TODO Generate id and revision per couch POST spec + void handlePost( string ns, const char *body, BSONObj& params, int & responseCode, stringstream & out ) { + try { + BSONObj obj = fromjson( body ); + db.insert( ns.c_str(), obj ); + } catch ( ... ) { + responseCode = 400; // Bad Request. Seems reasonable for now. + out << "{ \"ok\" : false }"; + return; + } + + responseCode = 201; + out << "{ \"ok\" : true }"; + } + + int _getOption( BSONElement e , int def ) { + if ( e.isNumber() ) + return e.numberInt(); + if ( e.type() == String ) + return atoi( e.valuestr() ); + return def; + } + + DBDirectClient db; + + } restHandler; + + bool webHaveAdminUsers(){ + readlocktryassert rl("admin.system.users", 10000); + Client::Context cx( "admin.system.users" ); + return ! Helpers::isEmpty("admin.system.users"); + } + + BSONObj webGetAdminUser( const string& username ){ + Client::GodScope gs; + readlocktryassert rl("admin.system.users", 10000); + Client::Context cx( "admin.system.users" ); + BSONObj user; + if ( Helpers::findOne( "admin.system.users" , BSON( "user" << username ) , user ) ) + return user.copy(); + return BSONObj(); + } + + class LowLevelMongodStatus : public WebStatusPlugin { + public: + LowLevelMongodStatus() : WebStatusPlugin( "low level" , 5 , "requires read lock" ){} + + virtual void init(){} + + void _gotLock( int millis , stringstream& ss ){ + ss << "
\n";
+            ss << "time to get readlock: " << millis << "ms\n";
+            
+            ss << "# databases: " << dbHolder.size() << '\n';
+            
+            if( ClientCursor::numCursors()>500 )
+                ss << "# Cursors: " << ClientCursor::numCursors() << '\n';
+            
+            ss << "\nreplication: ";
+            if( *replInfo )
+                ss << "\nreplInfo:  " << replInfo << "\n\n";
+            if( replSet ) {
+                ss << a("", "see replSetGetStatus link top of page") << "--replSet " << cmdLine._replSet << '\n';
+            }
+            if ( replAllDead )
+                ss << "replication replAllDead=" << replAllDead << "\n";
+
+            else {
+                ss << "\nmaster: " << replSettings.master << '\n';
+                ss << "slave:  " << replSettings.slave << '\n';
+                if ( replPair ) {
+                    ss << "replpair:\n";
+                    ss << replPair->getInfo();
+                }
+                bool seemCaughtUp = getInitialSyncCompleted();
+                if ( !seemCaughtUp ) ss << "";
+                ss <<   "initialSyncCompleted: " << seemCaughtUp;
+                if ( !seemCaughtUp ) ss << "";
+                ss << '\n';
+            }
+
+            BackgroundOperation::dump(ss);
+            ss << "
\n"; + } + + virtual void run( stringstream& ss ){ + Timer t; + readlocktry lk( "" , 300 ); + if ( lk.got() ){ + _gotLock( t.millis() , ss ); + } + else { + ss << "\ntimed out getting lock\n"; + } + } + } lowLevelMongodStatus; +} diff -Nru mongodb-1.4.4/db/scanandorder.h mongodb-1.6.3/db/scanandorder.h --- mongodb-1.4.4/db/scanandorder.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/scanandorder.h 2010-09-24 10:02:42.000000000 -0700 @@ -50,25 +50,31 @@ _ response size limit from runquery; push it up a bit. */ - inline void fillQueryResultFromObj(BufBuilder& bb, FieldMatcher *filter, BSONObj& js) { + inline void fillQueryResultFromObj(BufBuilder& bb, FieldMatcher *filter, BSONObj& js, DiskLoc* loc=NULL) { if ( filter ) { BSONObjBuilder b( bb ); BSONObjIterator i( js ); - bool gotId = false; while ( i.more() ){ BSONElement e = i.next(); const char * fname = e.fieldName(); if ( strcmp( fname , "_id" ) == 0 ){ - b.append( e ); - gotId = true; + if (filter->includeID()) + b.append( e ); } else { filter->append( b , e ); } } + if (loc) + b.append("$diskLoc", loc->toBSONObj()); + b.done(); + } else if (loc) { + BSONObjBuilder b( bb ); + b.appendElements(js); + b.append("$diskLoc", loc->toBSONObj()); b.done(); } else { - bb.append((void*) js.objdata(), js.objsize()); + bb.appendBuf((void*) js.objdata(), js.objsize()); } } @@ -80,17 +86,25 @@ KeyType order; unsigned approxSize; - void _add(BSONObj& k, BSONObj o) { - best.insert(make_pair(k,o)); + void _add(BSONObj& k, BSONObj o, DiskLoc* loc) { + if (!loc){ + best.insert(make_pair(k.getOwned(),o.getOwned())); + } else { + BSONObjBuilder b; + b.appendElements(o); + b.append("$diskLoc", loc->toBSONObj()); + best.insert(make_pair(k.getOwned(), b.obj().getOwned())); + } } - void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i) { + void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) { + /* todo : we don't correct approxSize here. */ const BSONObj& worstBestKey = i->first; int c = worstBestKey.woCompare(k, order.pattern); if ( c > 0 ) { // k is better, 'upgrade' best.erase(i); - _add(k, o); + _add(k, o, loc); } } @@ -106,19 +120,24 @@ return best.size(); } - void add(BSONObj o) { + void add(BSONObj o, DiskLoc* loc) { + assert( o.isValid() ); BSONObj k = order.getKeyFromObject(o); if ( (int) best.size() < limit ) { approxSize += k.objsize(); - uassert( 10128 , "too much key data for sort() with no index. add an index or specify a smaller limit", approxSize < 1 * 1024 * 1024 ); - _add(k, o); + approxSize += o.objsize(); + + /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */ + uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", approxSize < 32 * 1024 * 1024 ); + + _add(k, o, loc); return; } BestMap::iterator i; assert( best.end() != best.begin() ); i = best.end(); i--; - _addIfBetter(k, o, i); + _addIfBetter(k, o, i, loc); } void _fill(BufBuilder& b, FieldMatcher *filter, int& nout, BestMap::iterator begin, BestMap::iterator end) { diff -Nru mongodb-1.4.4/db/security_commands.cpp mongodb-1.6.3/db/security_commands.cpp --- mongodb-1.4.4/db/security_commands.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/security_commands.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -19,7 +19,7 @@ // security.cpp -#include "stdafx.h" +#include "pch.h" #include "security.h" #include "../util/md5.hpp" #include "json.h" @@ -49,15 +49,14 @@ class CmdGetNonce : public Command { public: virtual bool requiresAuth() { return false; } - virtual bool logTheOp() { - return false; - } - virtual bool slaveOk() { + virtual bool logTheOp() { return false; } + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return NONE; } + void help(stringstream& h) const { h << "internal"; } + virtual LockType locktype() const { return NONE; } CmdGetNonce() : Command("getnonce") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { nonce *n = new nonce(security.getNonce()); stringstream ss; ss << hex << *n; @@ -72,15 +71,15 @@ virtual bool logTheOp() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return NONE; } + void help(stringstream& h) const { h << "de-authenticate"; } + virtual LockType locktype() const { return NONE; } CmdLogout() : Command("logout") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - // database->name is the one we are logging out... + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { AuthenticationInfo *ai = cc().getAuthenticationInfo(); - ai->logout(nsToDatabase(ns)); + ai->logout(dbname); return true; } } cmdLogout; @@ -91,12 +90,13 @@ virtual bool logTheOp() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } - virtual LockType locktype(){ return WRITE; } // TODO: make this READ + virtual LockType locktype() const { return WRITE; } // TODO: make this READ + virtual void help(stringstream& ss) const { ss << "internal"; } CmdAuthenticate() : Command("authenticate") {} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log(1) << " authenticate: " << cmdObj << endl; string user = cmdObj.getStringField("user"); @@ -105,7 +105,7 @@ if( user.empty() || key.empty() || received_nonce.empty() ) { log() << "field missing/wrong type in received authenticate command " - << cc().database()->name + << dbname << endl; errmsg = "auth fails"; sleepmillis(10); @@ -119,9 +119,11 @@ nonce *ln = lastNonce.release(); if ( ln == 0 ) { reject = true; + log(1) << "auth: no lastNonce" << endl; } else { digestBuilder << hex << *ln; reject = digestBuilder.str() != received_nonce; + if ( reject ) log(1) << "auth: different lastNonce" << endl; } if ( reject ) { @@ -133,7 +135,7 @@ } static BSONObj userPattern = fromjson("{\"user\":1}"); - string systemUsers = cc().database()->name + ".system.users"; + string systemUsers = dbname + ".system.users"; OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1"); BSONObj userObj; @@ -164,7 +166,7 @@ string computed = digestToString( d ); if ( key != computed ){ - log() << "auth: key mismatch " << user << ", ns:" << ns << endl; + log() << "auth: key mismatch " << user << ", ns:" << dbname << endl; errmsg = "auth fails"; return false; } @@ -172,13 +174,7 @@ AuthenticationInfo *ai = cc().getAuthenticationInfo(); if ( userObj[ "readOnly" ].isBoolean() && userObj[ "readOnly" ].boolean() ) { - if ( readLockSupported() ){ - ai->authorizeReadOnly( cc().database()->name.c_str() ); - } - else { - log() << "warning: old version of boost, read-only users not supported" << endl; - ai->authorize( cc().database()->name.c_str() ); - } + ai->authorizeReadOnly( cc().database()->name.c_str() ); } else { ai->authorize( cc().database()->name.c_str() ); } diff -Nru mongodb-1.4.4/db/security.cpp mongodb-1.6.3/db/security.cpp --- mongodb-1.4.4/db/security.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/security.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "security.h" #include "instance.h" #include "client.h" @@ -31,9 +31,9 @@ int AuthenticationInfo::warned = 0; void AuthenticationInfo::print(){ - cout << "AuthenticationInfo: " << this << "\n"; + cout << "AuthenticationInfo: " << this << '\n'; for ( map::iterator i=m.begin(); i!=m.end(); i++ ){ - cout << "\t" << i->first << "\t" << i->second.level << "\n"; + cout << "\t" << i->first << "\t" << i->second.level << '\n'; } cout << "END" << endl; } diff -Nru mongodb-1.4.4/db/security.h mongodb-1.6.3/db/security.h --- mongodb-1.4.4/db/security.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/security.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,10 +18,6 @@ #pragma once -#include -#undef assert -#define assert xassert - #include "nonce.h" #include "concurrency.h" @@ -42,7 +38,7 @@ static int warned; public: bool isLocalHost; - AuthenticationInfo() { isLocalHost = false; } + AuthenticationInfo() : _lock("AuthenticationInfo") { isLocalHost = false; } ~AuthenticationInfo() { } void logout(const string& dbname ) { diff -Nru mongodb-1.4.4/db/stats/counters.cpp mongodb-1.6.3/db/stats/counters.cpp --- mongodb-1.4.4/db/stats/counters.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/stats/counters.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ */ -#include "stdafx.h" +#include "pch.h" #include "../jsobj.h" #include "counters.h" @@ -34,17 +34,17 @@ b.append( "command" , zero ); _obj = b.obj(); - _insert = (int*)_obj["insert"].value(); - _query = (int*)_obj["query"].value(); - _update = (int*)_obj["update"].value(); - _delete = (int*)_obj["delete"].value(); - _getmore = (int*)_obj["getmore"].value(); - _command = (int*)_obj["command"].value(); + _insert = (AtomicUInt*)_obj["insert"].value(); + _query = (AtomicUInt*)_obj["query"].value(); + _update = (AtomicUInt*)_obj["update"].value(); + _delete = (AtomicUInt*)_obj["delete"].value(); + _getmore = (AtomicUInt*)_obj["getmore"].value(); + _command = (AtomicUInt*)_obj["command"].value(); } void OpCounters::gotOp( int op , bool isCommand ){ switch ( op ){ - case dbInsert: gotInsert(); break; + case dbInsert: /*gotInsert();*/ break; // need to handle multi-insert case dbQuery: if ( isCommand ) gotCommand(); @@ -123,6 +123,24 @@ b.appendNumber( "last_ms" , _last_time ); b.append("last_finished", _last); } + + + void GenericCounter::hit( const string& name , int count ){ + scoped_lock lk( _mutex ); + _counts[name]++; + } + + BSONObj GenericCounter::getObj() { + BSONObjBuilder b(128); + { + mongo::mutex::scoped_lock lk( _mutex ); + for ( map::iterator i=_counts.begin(); i!=_counts.end(); i++ ){ + b.appendNumber( i->first , i->second ); + } + } + return b.obj(); + } + OpCounters globalOpCounters; diff -Nru mongodb-1.4.4/db/stats/counters.h mongodb-1.6.3/db/stats/counters.h --- mongodb-1.4.4/db/stats/counters.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/stats/counters.h 2010-09-24 10:02:42.000000000 -0700 @@ -15,8 +15,9 @@ * along with this program. If not, see . */ +#pragma once -#include "../../stdafx.h" +#include "../../pch.h" #include "../jsobj.h" #include "../../util/message.h" #include "../../util/processinfo.h" @@ -32,12 +33,12 @@ OpCounters(); - int * getInsert(){ return _insert; } - int * getQuery(){ return _query; } - int * getUpdate(){ return _update; } - int * getDelete(){ return _delete; } - int * getGetMore(){ return _getmore; } - int * getCommand(){ return _command; } + AtomicUInt * getInsert(){ return _insert; } + AtomicUInt * getQuery(){ return _query; } + AtomicUInt * getUpdate(){ return _update; } + AtomicUInt * getDelete(){ return _delete; } + AtomicUInt * getGetMore(){ return _getmore; } + AtomicUInt * getCommand(){ return _command; } void gotInsert(){ _insert[0]++; } void gotQuery(){ _query[0]++; } @@ -51,12 +52,12 @@ BSONObj& getObj(){ return _obj; } private: BSONObj _obj; - int * _insert; - int * _query; - int * _update; - int * _delete; - int * _getmore; - int * _command; + AtomicUInt * _insert; + AtomicUInt * _query; + AtomicUInt * _update; + AtomicUInt * _delete; + AtomicUInt * _getmore; + AtomicUInt * _command; }; extern OpCounters globalOpCounters; @@ -118,4 +119,15 @@ }; extern FlushCounters globalFlushCounters; + + + class GenericCounter { + public: + GenericCounter() : _mutex("GenericCounter") { } + void hit( const string& name , int count=0 ); + BSONObj getObj(); + private: + map _counts; // TODO: replace with thread safe map + mongo::mutex _mutex; + }; } diff -Nru mongodb-1.4.4/db/stats/fine_clock.h mongodb-1.6.3/db/stats/fine_clock.h --- mongodb-1.4.4/db/stats/fine_clock.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/stats/fine_clock.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,66 @@ +// fine_clock.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef DB_STATS_FINE_CLOCK_HEADER +#define DB_STATS_FINE_CLOCK_HEADER + +#include // struct timespec + +namespace mongo { + + /** + * This is a nano-second precision clock. We're skipping the + * harware TSC in favor of clock_gettime() which in some systems + * does not involve a trip to the OS (VDSO). + * + * We're exporting a type WallTime that is and should remain + * opaque. The business of getting accurate time is still ongoing + * and we may change the internal representation of this class. + * (http://lwn.net/Articles/388188/) + * + * Really, you shouldn't be using this class in hot code paths for + * platforms you're not sure whether the overhead is low. + */ + class FineClock{ + public: + + typedef timespec WallTime; + + static WallTime now(){ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts; + } + + static uint64_t diffInNanos( WallTime end, WallTime start ){ + uint64_t diff; + if ( end.tv_nsec < start.tv_nsec ){ + diff = 1000000000 * ( end.tv_sec - start.tv_sec - 1); + diff += 1000000000 + end.tv_nsec - start.tv_nsec; + } else { + diff = 1000000000 * ( end.tv_sec - start.tv_sec ); + diff += end.tv_nsec - start.tv_nsec; + } + return diff; + } + + }; +} + +#endif // DB_STATS_FINE_CLOCK_HEADER + diff -Nru mongodb-1.4.4/db/stats/service_stats.cpp mongodb-1.6.3/db/stats/service_stats.cpp --- mongodb-1.4.4/db/stats/service_stats.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/stats/service_stats.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,68 @@ +// service_stats.cpp + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include + +#include "../../util/histogram.h" +#include "service_stats.h" + +namespace mongo { + + using std::ostringstream; + + ServiceStats::ServiceStats(){ + // Time histogram covers up to 128msec in exponential intervals + // starting at 125usec. + Histogram::Options timeOpts; + timeOpts.numBuckets = 12; + timeOpts.bucketSize = 125; + timeOpts.exponential = true; + _timeHistogram = new Histogram( timeOpts ); + + // Space histogram covers up to 1MB in exponentialintervals starting + // at 1K. + Histogram::Options spaceOpts; + spaceOpts.numBuckets = 12; + spaceOpts.bucketSize = 1024; + spaceOpts.exponential = true; + _spaceHistogram = new Histogram( spaceOpts ); + } + + ServiceStats::~ServiceStats(){ + delete _timeHistogram; + delete _spaceHistogram; + } + + void ServiceStats::logResponse( uint64_t duration, uint64_t bytes ){ + _spinLock.lock(); + _timeHistogram->insert( duration / 1000 /* in usecs */ ); + _spaceHistogram->insert( bytes ); + _spinLock.unlock(); + } + + string ServiceStats::toHTML() const { + ostringstream res ; + res << "Cumulative wire stats\n" + << "Response times\n" << _timeHistogram->toHTML() + << "Response sizes\n" << _spaceHistogram->toHTML() + << '\n'; + + return res.str(); + } + +} // mongo diff -Nru mongodb-1.4.4/db/stats/service_stats.h mongodb-1.6.3/db/stats/service_stats.h --- mongodb-1.4.4/db/stats/service_stats.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/db/stats/service_stats.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,66 @@ +// service_stats.h + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef DB_STATS_SERVICE_STATS_HEADER +#define DB_STATS_SERVICE_STATS_HEADER + +#include + +#include "../../util/concurrency/spin_lock.h" + +namespace mongo { + + using std::string; + + class Histogram; + + /** + * ServiceStats keeps track of the time a request/response message + * took inside a service as well as the size of the response + * generated. + */ + class ServiceStats { + public: + ServiceStats(); + ~ServiceStats(); + + /** + * Record the 'duration' in microseconds a request/response + * message took and the size in bytes of the generated + * response. + */ + void logResponse( uint64_t duration, uint64_t bytes ); + + /** + * Render the histogram as string that can be used inside an + * HTML doc. + */ + string toHTML() const; + + private: + SpinLock _spinLock; // protects state below + Histogram* _timeHistogram; + Histogram* _spaceHistogram; + + ServiceStats( const ServiceStats& ); + ServiceStats operator=( const ServiceStats& ); + }; + +} // namespace mongo + +#endif // DB_STATS_SERVICE_STATS_HEADER diff -Nru mongodb-1.4.4/db/stats/snapshots.cpp mongodb-1.6.3/db/stats/snapshots.cpp --- mongodb-1.4.4/db/stats/snapshots.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/stats/snapshots.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,10 +16,12 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "snapshots.h" #include "../client.h" #include "../clientcursor.h" +#include "../dbwebserver.h" +#include "../../util/mongoutils/html.h" /** handles snapshotting performance metrics and other such things @@ -55,7 +57,7 @@ } Snapshots::Snapshots(int n) - : _n(n) + : _lock("Snapshots"), _n(n) , _snapshots(new SnapshotData[n]) , _loc(0) , _stored(0) @@ -87,19 +89,16 @@ void Snapshots::outputLockInfoHTML( stringstream& ss ){ scoped_lock lk(_lock); - ss << "\n"; - ss << "\n"; - + ss << "\n
"; for ( int i=0; i" - << "
" - << "" - << "" - ; + unsigned e = (unsigned) d.elapsed() / 1000; + ss << (unsigned)(100*d.percentWriteLocked()); + if( e < 3900 || e > 4100 ) + ss << '(' << e / 1000.0 << "s)"; + ss << ' '; } - - ss << "
elapsed(ms)% write locked
" << ( d.elapsed() / 1000 ) << "" << (unsigned)(100*d.percentWriteLocked()) << "%
\n"; + ss << "\n"; } void SnapshotThread::run(){ @@ -122,8 +121,6 @@ log() << "cpu: elapsed:" << (elapsed/1000) <<" writelock: " << (int)(100*d.percentWriteLocked()) << "%" << endl; } - // TODO: this should really be somewhere else, like in a special ClientCursor thread - ClientCursor::idleTimeReport( (unsigned)(elapsed/1000) ); } prev = s; @@ -139,6 +136,89 @@ client.shutdown(); } + using namespace mongoutils::html; + + class WriteLockStatus : public WebStatusPlugin { + public: + WriteLockStatus() : WebStatusPlugin( "write lock" , 51 , "% time in write lock, by 4 sec periods" ){} + virtual void init(){} + + virtual void run( stringstream& ss ){ + statsSnapshots.outputLockInfoHTML( ss ); + + ss << ""; + ss << "write locked now: " << (dbMutex.info().isLocked() ? "true" : "false") << "\n"; + } + + } writeLockStatus; + + class DBTopStatus : public WebStatusPlugin { + public: + DBTopStatus() : WebStatusPlugin( "dbtop" , 50 , "(occurences|percent of elapsed)" ){} + + void display( stringstream& ss , double elapsed , const Top::UsageData& usage ){ + ss << ""; + ss << usage.count; + ss << ""; + double per = 100 * ((double)usage.time)/elapsed; + ss << setprecision(1) << fixed << per << "%"; + ss << ""; + } + + void display( stringstream& ss , double elapsed , const string& ns , const Top::CollectionData& data ){ + if ( ns != "GLOBAL" && data.total.count == 0 ) + return; + ss << "" << ns << ""; + + display( ss , elapsed , data.total ); + + display( ss , elapsed , data.readLock ); + display( ss , elapsed , data.writeLock ); + + display( ss , elapsed , data.queries ); + display( ss , elapsed , data.getmore ); + display( ss , elapsed , data.insert ); + display( ss , elapsed , data.update ); + display( ss , elapsed , data.remove ); + + ss << "\n"; + } + + void run( stringstream& ss ){ + auto_ptr delta = statsSnapshots.computeDelta(); + if ( ! delta.get() ) + return; + + ss << ""; + ss << "" + "" + "" + "" + "" + "" + "" + "" + ""; + ss << "\n"; + + display( ss , (double) delta->elapsed() , "GLOBAL" , delta->globalUsageDiff() ); + + Top::UsageMap usage = delta->collectionUsageDiff(); + for ( Top::UsageMap::iterator i=usage.begin(); i != usage.end(); i++ ){ + display( ss , (double) delta->elapsed() , i->first , i->second ); + } + + ss << "
"; + ss << a("http://www.mongodb.org/display/DOCS/Developer+FAQ#DeveloperFAQ-What%27sa%22namespace%22%3F", "namespace") << + "NStotalReadsWritesQueriesGetMoresInsertsUpdatesRemoves
"; + + } + + virtual void init(){} + } dbtopStatus; + Snapshots statsSnapshots; SnapshotThread snapshotThread; } diff -Nru mongodb-1.4.4/db/stats/snapshots.h mongodb-1.6.3/db/stats/snapshots.h --- mongodb-1.4.4/db/stats/snapshots.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/stats/snapshots.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ */ #pragma once -#include "../../stdafx.h" +#include "../../pch.h" #include "../jsobj.h" #include "top.h" #include "../../util/background.h" @@ -103,6 +103,7 @@ class SnapshotThread : public BackgroundJob { public: + string name() { return "snapshot"; } void run(); }; diff -Nru mongodb-1.4.4/db/stats/top.cpp mongodb-1.6.3/db/stats/top.cpp --- mongodb-1.4.4/db/stats/top.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/stats/top.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ */ -#include "stdafx.h" +#include "pch.h" #include "top.h" #include "../../util/message.h" #include "../commands.h" @@ -96,10 +96,11 @@ case dbDelete: c.remove.inc( micros ); break; + case dbKillCursors: + break; case opReply: case dbMsg: - case dbKillCursors: - //log() << "unexpected op in Top::record: " << op << endl; + log() << "unexpected op in Top::record: " << op << endl; break; default: log() << "unknown op in Top::record: " << op << endl; @@ -148,14 +149,14 @@ class TopCmd : public Command { public: - TopCmd() : Command( "top" ){} + TopCmd() : Command( "top", true ){} - virtual bool slaveOk(){ return true; } - virtual bool adminOnly(){ return true; } - virtual LockType locktype(){ return READ; } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return true; } + virtual LockType locktype() const { return READ; } virtual void help( stringstream& help ) const { help << "usage by collection"; } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){ { BSONObjBuilder b( result.subobjStart( "totals" ) ); Top::global.append( b ); @@ -175,7 +176,7 @@ TopOld::UsageMap TopOld::_snapshotB; TopOld::UsageMap &TopOld::_snapshot = TopOld::_snapshotA; TopOld::UsageMap &TopOld::_nextSnapshot = TopOld::_snapshotB; - mongo::mutex TopOld::topMutex; + mongo::mutex TopOld::topMutex("topMutex"); } diff -Nru mongodb-1.4.4/db/stats/top.h mongodb-1.6.3/db/stats/top.h --- mongodb-1.4.4/db/stats/top.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/stats/top.h 2010-09-24 10:02:42.000000000 -0700 @@ -19,7 +19,7 @@ #include #undef assert -#define assert xassert +#define assert MONGO_assert namespace mongo { @@ -29,6 +29,8 @@ class Top { public: + Top() : _lock("Top") { } + class UsageData { public: UsageData() : time(0) , count(0){} diff -Nru mongodb-1.4.4/db/storage.cpp mongodb-1.6.3/db/storage.cpp --- mongodb-1.4.4/db/storage.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/storage.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,28 +16,30 @@ */ -#include "stdafx.h" +#include "pch.h" #include "pdfile.h" -#include "reccache.h" +//#include "reccache.h" #include "rec.h" #include "db.h" namespace mongo { -void writerThread(); - -#if defined(_RECSTORE) - static int inited; -#endif - // pick your store for indexes by setting this typedef // this doesn't need to be an ifdef, we can make it dynamic #if defined(_RECSTORE) RecStoreInterface *btreeStore = new CachedBasicRecStore(); #else -RecStoreInterface *btreeStore = new MongoMemMapped_RecStore(); +MongoMemMapped_RecStore *btreeStore = new MongoMemMapped_RecStore(); #endif +#if 0 + +#if defined(_RECSTORE) + static int inited; +#endif + +void writerThread(); + void BasicRecStore::init(const char *fn, unsigned recsize) { massert( 10394 , "compile packing problem recstore?", sizeof(RecStoreHeader) == 8192); @@ -74,4 +76,6 @@ #endif } +#endif + } diff -Nru mongodb-1.4.4/db/tests.cpp mongodb-1.6.3/db/tests.cpp --- mongodb-1.4.4/db/tests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/tests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -19,7 +19,7 @@ unit test & such */ -#include "stdafx.h" +#include "pch.h" #include "../util/mmap.h" namespace mongo { diff -Nru mongodb-1.4.4/db/update.cpp mongodb-1.6.3/db/update.cpp --- mongodb-1.4.4/db/update.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/update.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,13 +16,14 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "query.h" #include "pdfile.h" #include "jsobjmanipulator.h" #include "queryoptimizer.h" #include "repl.h" #include "update.h" +#include "btree.h" //#define DEBUGUPDATE(x) cout << x << endl; #define DEBUGUPDATE(x) @@ -236,7 +237,7 @@ } while( i.more() ) { - bb.appendAs( i.next() , bb.numStr( n - 1 ).c_str() ); + bb.appendAs( i.next() , bb.numStr( n - 1 ) ); n++; } } @@ -306,8 +307,10 @@ } auto_ptr ModSet::prepare(const BSONObj &obj) const { + DEBUGUPDATE( "\t start prepare" ); ModSetState * mss = new ModSetState( obj ); - + + // Perform this check first, so that we don't leave a partially modified object on uassert. for ( ModHolder::const_iterator i = _mods.begin(); i != _mods.end(); ++i ) { DEBUGUPDATE( "\t\t prepare : " << i->first ); @@ -407,9 +410,41 @@ mss->amIInPlacePossible( false ); } } + + DEBUGUPDATE( "\t mss\n" << mss->toString() << "\t--" ); return auto_ptr( mss ); } + + void ModState::appendForOpLog( BSONObjBuilder& b ) const { + if ( incType ){ + DEBUGUPDATE( "\t\t\t\t\t appendForOpLog inc fieldname: " << m->fieldName << " short:" << m->shortFieldName ); + BSONObjBuilder bb( b.subobjStart( "$set" ) ); + appendIncValue( bb , true ); + bb.done(); + return; + } + + const char * name = fixedOpName ? fixedOpName : Mod::modNames[op()]; + + DEBUGUPDATE( "\t\t\t\t\t appendForOpLog name:" << name << " fixed: " << fixed << " fn: " << m->fieldName ); + + BSONObjBuilder bb( b.subobjStart( name ) ); + if ( fixed ) + bb.appendAs( *fixed , m->fieldName ); + else + bb.appendAs( m->elt , m->fieldName ); + bb.done(); + } + + string ModState::toString() const { + stringstream ss; + if ( fixedOpName ) + ss << " fixedOpName: " << fixedOpName; + if ( fixed ) + ss << " fixed: " << fixed; + return ss.str(); + } void ModSetState::applyModsInPlace() { for ( ModStateHolder::iterator i = _mods.begin(); i != _mods.end(); ++i ) { @@ -492,7 +527,7 @@ string field = root + e.fieldName(); FieldCompareResult cmp = compareDottedFieldNames( m->second.m->fieldName , field ); - DEBUGUPDATE( "\t\t\t" << field << "\t" << m->second.m->fieldName << "\t" << cmp ); + DEBUGUPDATE( "\t\t\t field:" << field << "\t mod:" << m->second.m->fieldName << "\t cmp:" << cmp << "\t short: " << e.fieldName() ); switch ( cmp ){ @@ -515,18 +550,28 @@ e = es.next(); m++; } + else { + // this is a very weird case + // have seen it in production, but can't reproduce + // this assert prevents an inf. loop + // but likely isn't the correct solution + assert(0); + } continue; } case LEFT_BEFORE: // Mod on a field that doesn't exist + DEBUGUPDATE( "\t\t\t\t creating new field for: " << m->second.m->fieldName ); _appendNewFromMods( root , m->second , b , onedownseen ); m++; continue; case SAME: + DEBUGUPDATE( "\t\t\t\t applying mod on: " << m->second.m->fieldName ); m->second.apply( b , e ); e = es.next(); m++; continue; case RIGHT_BEFORE: // field that doesn't have a MOD + DEBUGUPDATE( "\t\t\t\t just copying" ); b.append( e ); // if array, ignore field name e = es.next(); continue; @@ -540,12 +585,14 @@ // finished looping the mods, just adding the rest of the elements while ( e.type() ){ + DEBUGUPDATE( "\t\t\t copying: " << e.fieldName() ); b.append( e ); // if array, ignore field name e = es.next(); } // do mods that don't have fields already for ( ; m != mend; m++ ){ + DEBUGUPDATE( "\t\t\t\t appending from mod at end: " << m->second.m->fieldName ); _appendNewFromMods( root , m->second , b , onedownseen ); } } @@ -556,6 +603,14 @@ return b.obj(); } + string ModSetState::toString() const { + stringstream ss; + for ( ModStateHolder::const_iterator i=_mods.begin(); i!=_mods.end(); ++i ){ + ss << "\t\t" << i->first << "\t" << i->second.toString() << "\n"; + } + return ss.str(); + } + BSONObj ModSet::createNewFromQuery( const BSONObj& query ){ BSONObj newObj; @@ -565,6 +620,8 @@ BSONObjIteratorSorted i( query ); while ( i.more() ){ BSONElement e = i.next(); + if ( e.fieldName()[0] == '$' ) // for $atomic and anything else we add + continue; if ( e.type() == Object && e.embeddedObject().firstElement().fieldName()[0] == '$' ){ // this means this is a $gt type filter, so don't make part of the new object @@ -610,6 +667,7 @@ uassert( 10147 , "Invalid modifier specified" + string( fn ), e.type() == Object ); BSONObj j = e.embeddedObject(); + DEBUGUPDATE( "\t" << j ); BSONObjIterator jt(j); Mod::Op op = opFromStr( fn ); @@ -622,7 +680,7 @@ uassert( 10148 , "Mod on _id not allowed", strcmp( fieldName, "_id" ) != 0 ); uassert( 10149 , "Invalid mod field name, may not end in a period", fieldName[ strlen( fieldName ) - 1 ] != '.' ); uassert( 10150 , "Field name duplication not allowed with modifiers", ! haveModForField( fieldName ) ); - uassert( 10151 , "have conflict mod" , ! haveConflictingMod( fieldName ) ); + uassert( 10151 , "have conflicting mods in update" , ! haveConflictingMod( fieldName ) ); uassert( 10152 , "Modifier $inc allowed for numbers only", f.isNumber() || op != Mod::INC ); uassert( 10153 , "Modifier $pushAll/pullAll allowed for arrays only", f.type() == Array || ( op != Mod::PUSH_ALL && op != Mod::PULL_ALL ) ); @@ -639,7 +697,7 @@ _mods[m.fieldName] = m; - DEBUGUPDATE( "\t\t " << fieldName << "\t" << _hasDynamicArray ); + DEBUGUPDATE( "\t\t " << fieldName << "\t" << m.fieldName << "\t" << _hasDynamicArray ); } } @@ -677,54 +735,152 @@ } } - class UpdateOp : public QueryOp { + class UpdateOp : public MultiCursor::CursorOp { public: - UpdateOp() : _nscanned() {} - virtual void init() { - BSONObj pattern = qp().query(); - _c.reset( qp().newCursor().release() ); - if ( ! _c->ok() ) + UpdateOp( bool hasPositionalField ) : _nscanned(), _hasPositionalField( hasPositionalField ){} + virtual void _init() { + _c = qp().newCursor(); + if ( ! _c->ok() ) { setComplete(); - else - _matcher.reset( new CoveredIndexMatcher( pattern, qp().indexKey() ) ); + } } + virtual bool prepareToYield() { + if ( ! _cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , qp().ns() ) ); + } + return _cc->prepareToYield( _yieldData ); + } + virtual void recoverFromYield() { + if ( !ClientCursor::recoverFromYield( _yieldData ) ) { + _c.reset(); + _cc.reset(); + massert( 13339, "cursor dropped during update", false ); + } + } virtual void next() { if ( ! _c->ok() ) { setComplete(); return; } _nscanned++; - if ( _matcher->matches(_c->currKey(), _c->currLoc(), &_details ) ) { + if ( matcher()->matches(_c->currKey(), _c->currLoc(), &_details ) ) { setComplete(); return; } _c->advance(); } - bool curMatches(){ - return _matcher->matches(_c->currKey(), _c->currLoc() , &_details ); - } + virtual bool mayRecordPlan() const { return false; } - virtual QueryOp *clone() const { - return new UpdateOp(); + virtual QueryOp *_createChild() const { + return new UpdateOp( _hasPositionalField ); } - shared_ptr< Cursor > c() { return _c; } - long long nscanned() const { return _nscanned; } - MatchDetails& getMatchDetails(){ return _details; } + // already scanned to the first match, so return _c + virtual shared_ptr< Cursor > newCursor() const { return _c; } + virtual bool alwaysUseRecord() const { return _hasPositionalField; } private: shared_ptr< Cursor > _c; long long _nscanned; - auto_ptr< CoveredIndexMatcher > _matcher; + bool _hasPositionalField; MatchDetails _details; + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; }; - - UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug ) { + static void checkTooLarge(const BSONObj& newObj) { + uassert( 12522 , "$ operator made object too large" , newObj.objsize() <= ( 4 * 1024 * 1024 ) ); + } + + /* note: this is only (as-is) called for + + - not multi + - not mods is indexed + - not upsert + */ + static UpdateResult _updateById(bool isOperatorUpdate, int idIdxNo, ModSet *mods, int profile, NamespaceDetails *d, + NamespaceDetailsTransient *nsdt, + bool god, const char *ns, + const BSONObj& updateobj, BSONObj patternOrig, bool logop, OpDebug& debug) + { + DiskLoc loc; + { + IndexDetails& i = d->idx(idIdxNo); + BSONObj key = i.getKeyFromQuery( patternOrig ); + loc = i.head.btree()->findSingle(i, i.head, key); + if( loc.isNull() ) { + // no upsert support in _updateById yet, so we are done. + return UpdateResult(0, 0, 0); + } + } + + Record *r = loc.rec(); + + /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some + regular ones at the moment. */ + if ( isOperatorUpdate ) { + const BSONObj& onDisk = loc.obj(); + auto_ptr mss = mods->prepare( onDisk ); + + if( mss->canApplyInPlace() ) { + mss->applyModsInPlace(); + DEBUGUPDATE( "\t\t\t updateById doing in place update" ); + /*if ( profile ) + ss << " fastmod "; */ + } + else { + BSONObj newObj = mss->createNewFromMods(); + checkTooLarge(newObj); + bool changedId; + assert(nsdt); + DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug, changedId); + } + + if ( logop ) { + DEV assert( mods->size() ); + + BSONObj pattern = patternOrig; + if ( mss->haveArrayDepMod() ) { + BSONObjBuilder patternBuilder; + patternBuilder.appendElements( pattern ); + mss->appendSizeSpecForArrayDepMods( patternBuilder ); + pattern = patternBuilder.obj(); + } + + if( mss->needOpLogRewrite() ) { + DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() ); + logOp("u", ns, mss->getOpLogRewrite() , &pattern ); + } + else { + logOp("u", ns, updateobj, &pattern ); + } + } + return UpdateResult( 1 , 1 , 1); + } // end $operator update + + // regular update + BSONElementManipulator::lookForTimestamps( updateobj ); + checkNoMods( updateobj ); + bool changedId = false; + assert(nsdt); + theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, changedId); + if ( logop ) { + if ( !changedId ) { + logOp("u", ns, updateobj, &patternOrig ); + } else { + logOp("d", ns, patternOrig ); + logOp("i", ns, updateobj ); + } + } + return UpdateResult( 1 , 0 , 1 ); + } + + UpdateResult _updateObjects(bool god, const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs ) { DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi ); - int profile = cc().database()->profile; + Client& client = cc(); + int profile = client.database()->profile; StringBuilder& ss = debug.str; if ( logLevel > 2 ) - ss << " update: " << updateobj; + ss << " update: " << updateobj.toString(); /* idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case */ /* NOTE: when yield() is added herein, these must be refreshed after each call to yield! */ @@ -732,12 +888,6 @@ NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get_w(ns); /* end note */ - uassert( 10155 , "cannot update reserved $ collection", strchr(ns, '$') == 0 ); - if ( strstr(ns, ".system.") ) { - /* dm: it's very important that system.indexes is never updated as IndexDetails has pointers into it */ - uassert( 10156 , "cannot update system collection", legalClientSystemNS( ns , true ) ); - } - auto_ptr mods; bool isOperatorUpdate = updateobj.firstElement().fieldName()[0] == '$'; int modsIsIndexed = false; // really the # of indexes @@ -753,31 +903,63 @@ modsIsIndexed = mods->isIndexed(); } + if( !upsert && !multi && isSimpleIdQuery(patternOrig) && d && !modsIsIndexed ) { + int idxNo = d->findIdIndex(); + if( idxNo >= 0 ) { + ss << " byid "; + return _updateById(isOperatorUpdate, idxNo, mods.get(), profile, d, nsdt, god, ns, updateobj, patternOrig, logop, debug); + } + } + set seenObjects; - QueryPlanSet qps( ns, patternOrig, BSONObj() ); - UpdateOp original; - shared_ptr< UpdateOp > u = qps.runOp( original ); - massert( 10401 , u->exceptionMessage(), u->complete() ); - shared_ptr< Cursor > c = u->c(); int numModded = 0; + long long nscanned = 0; + MatchDetails details; + shared_ptr< MultiCursor::CursorOp > opPtr( new UpdateOp( mods.get() && mods->hasDynamicArray() ) ); + shared_ptr< MultiCursor > c( new MultiCursor( ns, patternOrig, BSONObj(), opPtr, true ) ); + + auto_ptr cc; + while ( c->ok() ) { - if ( numModded > 0 && ! u->curMatches() ){ + nscanned++; + + bool atomic = c->matcher()->docMatcher().atomic(); + + // May have already matched in UpdateOp, but do again to get details set correctly + if ( ! c->matcher()->matches( c->currKey(), c->currLoc(), &details ) ){ c->advance(); + + if ( nscanned % 256 == 0 && ! atomic ){ + if ( cc.get() == 0 ) { + shared_ptr< Cursor > cPtr = c; + cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); + } + if ( ! cc->yield() ){ + cc.release(); + // TODO should we assert or something? + break; + } + if ( !c->ok() ) { + break; + } + } continue; } + Record *r = c->_current(); DiskLoc loc = c->currLoc(); - + + // TODO Maybe this is unnecessary since we have seenObjects if ( c->getsetdup( loc ) ){ c->advance(); continue; } - + BSONObj js(r); - + BSONObj pattern = patternOrig; - + if ( logop ) { BSONObjBuilder idPattern; BSONElement id; @@ -793,43 +975,47 @@ uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); } } - + if ( profile ) - ss << " nscanned:" << u->nscanned(); - + ss << " nscanned:" << nscanned; + /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some - regular ones at the moment. */ + regular ones at the moment. */ if ( isOperatorUpdate ) { - + if ( multi ){ c->advance(); // go to next record in case this one moves if ( seenObjects.count( loc ) ) continue; } - + const BSONObj& onDisk = loc.obj(); - + ModSet * useMods = mods.get(); bool forceRewrite = false; - + auto_ptr mymodset; - if ( u->getMatchDetails().elemMatchKey && mods->hasDynamicArray() ){ - useMods = mods->fixDynamicArray( u->getMatchDetails().elemMatchKey ); + if ( details.elemMatchKey && mods->hasDynamicArray() ){ + useMods = mods->fixDynamicArray( details.elemMatchKey ); mymodset.reset( useMods ); forceRewrite = true; } - - + auto_ptr mss = useMods->prepare( onDisk ); - + bool indexHack = multi && ( modsIsIndexed || ! mss->canApplyInPlace() ); - - if ( indexHack ) - c->noteLocation(); - + + if ( indexHack ){ + if ( cc.get() ) + cc->updateLocation(); + else + c->noteLocation(); + } + if ( modsIsIndexed <= 0 && mss->canApplyInPlace() ){ mss->applyModsInPlace();// const_cast(onDisk) ); - + + DEBUGUPDATE( "\t\t\t doing in place update" ); if ( profile ) ss << " fastmod "; @@ -838,26 +1024,30 @@ } } else { + if ( rs ) + rs->goingToDelete( onDisk ); + BSONObj newObj = mss->createNewFromMods(); - uassert( 12522 , "$ operator made object too large" , newObj.objsize() <= ( 4 * 1024 * 1024 ) ); - DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug); + checkTooLarge(newObj); + bool changedId; + DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug, changedId); if ( newLoc != loc || modsIsIndexed ) { // object moved, need to make sure we don' get again seenObjects.insert( newLoc ); } } - + if ( logop ) { DEV assert( mods->size() ); - + if ( mss->haveArrayDepMod() ) { BSONObjBuilder patternBuilder; patternBuilder.appendElements( pattern ); mss->appendSizeSpecForArrayDepMods( patternBuilder ); pattern = patternBuilder.obj(); } - + if ( forceRewrite || mss->needOpLogRewrite() ){ DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() ); logOp("u", ns, mss->getOpLogRewrite() , &pattern ); @@ -868,19 +1058,42 @@ } numModded++; if ( ! multi ) - break; + return UpdateResult( 1 , 1 , numModded ); if ( indexHack ) c->checkLocation(); + + if ( nscanned % 64 == 0 && ! atomic ){ + if ( cc.get() == 0 ) { + shared_ptr< Cursor > cPtr = c; + cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); + } + if ( ! cc->yield() ){ + cc.release(); + break; + } + if ( !c->ok() ) { + break; + } + } + continue; } - + uassert( 10158 , "multi update only works with $ operators" , ! multi ); - + BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); - theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug); - if ( logop ) - logOp("u", ns, updateobj, &pattern ); + bool changedId = false; + theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, changedId, god); + if ( logop ) { + DEV if( god ) log() << "REALLY??" << endl; // god doesn't get logged, this would be bad. + if ( !changedId ) { + logOp("u", ns, updateobj, &pattern ); + } else { + logOp("d", ns, pattern ); + logOp("i", ns, updateobj ); + } + } return UpdateResult( 1 , 0 , 1 ); } @@ -889,7 +1102,7 @@ if ( profile ) - ss << " nscanned:" << u->nscanned(); + ss << " nscanned:" << nscanned; if ( upsert ) { if ( updateobj.firstElement().fieldName()[0] == '$' ) { @@ -897,24 +1110,32 @@ BSONObj newObj = mods->createNewFromQuery( patternOrig ); if ( profile ) ss << " fastmodinsert "; - theDataFileMgr.insert(ns, newObj); - if ( profile ) - ss << " fastmodinsert "; + theDataFileMgr.insertWithObjMod(ns, newObj, god); if ( logop ) logOp( "i", ns, newObj ); - return UpdateResult( 0 , 1 , 1 ); + + return UpdateResult( 0 , 1 , 1 , newObj ); } uassert( 10159 , "multi update only works with $ operators" , ! multi ); checkNoMods( updateobj ); if ( profile ) ss << " upsert "; BSONObj no = updateobj; - theDataFileMgr.insert(ns, no); + theDataFileMgr.insertWithObjMod(ns, no, god); if ( logop ) logOp( "i", ns, no ); - return UpdateResult( 0 , 0 , 1 ); + return UpdateResult( 0 , 0 , 1 , no ); } return UpdateResult( 0 , 0 , 0 ); } - + + UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug ) { + uassert( 10155 , "cannot update reserved $ collection", strchr(ns, '$') == 0 ); + if ( strstr(ns, ".system.") ) { + /* dm: it's very important that system.indexes is never updated as IndexDetails has pointers into it */ + uassert( 10156 , "cannot update system collection", legalClientSystemNS( ns , true ) ); + } + return _updateObjects(false, ns, updateobj, patternOrig, upsert, multi, logop, debug); + } + } diff -Nru mongodb-1.4.4/db/update.h mongodb-1.6.3/db/update.h --- mongodb-1.4.4/db/update.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/db/update.h 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "../stdafx.h" +#include "../pch.h" #include "jsobj.h" #include "../util/embedded_builder.h" #include "matcher.h" @@ -129,6 +129,15 @@ StringBuilder buf( fullName.size() + 1 ); for ( size_t i=0; i 0 && fullName[i-1] == '.' && + i+1fieldName ); - else - bb.appendAs( m->elt , m->fieldName ); - bb.done(); - } + void appendForOpLog( BSONObjBuilder& b ) const; template< class Builder > void apply( Builder& b , BSONElement in ){ @@ -436,7 +429,7 @@ template< class Builder > void appendIncValue( Builder& b , bool useFullName ) const { const char * n = useFullName ? m->fieldName : m->shortFieldName; - + switch ( incType ){ case NumberDouble: b.append( n , incdouble ); break; @@ -448,6 +441,8 @@ assert(0); } } + + string toString() const; }; /** @@ -578,6 +573,7 @@ } } + string toString() const; friend class ModSet; }; diff -Nru mongodb-1.4.4/dbtests/basictests.cpp mongodb-1.6.3/dbtests/basictests.cpp --- mongodb-1.4.4/dbtests/basictests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/basictests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,11 +17,12 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "dbtests.h" #include "../util/base64.h" #include "../util/array.h" +#include "../util/text.h" namespace BasicTests { @@ -186,6 +187,7 @@ class sleeptest { public: + void run(){ Timer t; sleepsecs( 1 ); @@ -199,8 +201,45 @@ t.reset(); sleepmillis( 1727 ); ASSERT( t.millis() >= 1000 ); - ASSERT( t.millis() <= 2000 ); + ASSERT( t.millis() <= 2500 ); + + { + int total = 1200; + int ms = 2; + t.reset(); + for ( int i=0; i<(total/ms); i++ ){ + sleepmillis( ms ); + } + { + int x = t.millis(); + if ( x < 1000 || x > 2500 ){ + cout << "sleeptest x: " << x << endl; + ASSERT( x >= 1000 ); + ASSERT( x <= 20000 ); + } + } + } +#ifdef __linux__ + { + int total = 1200; + int micros = 100; + t.reset(); + int numSleeps = 1000*(total/micros); + for ( int i=0; i 2500 ){ + cout << "sleeptest y: " << y << endl; + ASSERT( y >= 1000 ); + /* ASSERT( y <= 100000 ); */ + } + } + } +#endif + } }; @@ -220,7 +259,9 @@ } void run(){ uassert( -1 , foo() , 1 ); - ASSERT_EQUALS( 0 , x ); + if( x != 0 ) { + ASSERT_EQUALS( 0 , x ); + } try { uassert( -1 , foo() , 0 ); } @@ -268,7 +309,7 @@ { ThreadSafeString bar; bar = "eliot2"; - foo = bar; + foo = bar.toString(); } ASSERT_EQUALS( "eliot2" , foo ); } @@ -316,7 +357,111 @@ ASSERT_EQUALS( -1 , lexNumCmp( "a.0.b" , "a.1" ) ); } }; + + class DatabaseValidNames { + public: + void run(){ + ASSERT( Database::validDBName( "foo" ) ); + ASSERT( ! Database::validDBName( "foo/bar" ) ); + ASSERT( ! Database::validDBName( "foo.bar" ) ); + + ASSERT( nsDollarCheck( "asdads" ) ); + ASSERT( ! nsDollarCheck( "asda$ds" ) ); + ASSERT( nsDollarCheck( "local.oplog.$main" ) ); + } + }; + class PtrTests { + public: + void run(){ + scoped_ptr p1 (new int(1)); + boost::shared_ptr p2 (new int(2)); + scoped_ptr p3 (new int(3)); + boost::shared_ptr p4 (new int(4)); + + //non-const + ASSERT_EQUALS( p1.get() , ptr(p1) ); + ASSERT_EQUALS( p2.get() , ptr(p2) ); + ASSERT_EQUALS( p2.get() , ptr(p2.get()) ); // T* constructor + ASSERT_EQUALS( p2.get() , ptr(ptr(p2)) ); // copy constructor + ASSERT_EQUALS( *p2 , *ptr(p2)); + ASSERT_EQUALS( p2.get() , ptr >(&p2)->get() ); // operator-> + + //const + ASSERT_EQUALS( p1.get() , ptr(p1) ); + ASSERT_EQUALS( p2.get() , ptr(p2) ); + ASSERT_EQUALS( p2.get() , ptr(p2.get()) ); + ASSERT_EQUALS( p3.get() , ptr(p3) ); + ASSERT_EQUALS( p4.get() , ptr(p4) ); + ASSERT_EQUALS( p4.get() , ptr(p4.get()) ); + ASSERT_EQUALS( p2.get() , ptr(ptr(p2)) ); + ASSERT_EQUALS( p2.get() , ptr(ptr(p2)) ); // constizing copy constructor + ASSERT_EQUALS( *p2 , *ptr(p2)); + ASSERT_EQUALS( p2.get() , ptr >(&p2)->get() ); + + //bool context + ASSERT( ptr(p1) ); + ASSERT( !ptr(NULL) ); + ASSERT( !ptr() ); + +#if 0 + // These shouldn't compile + ASSERT_EQUALS( p3.get() , ptr(p3) ); + ASSERT_EQUALS( p4.get() , ptr(p4) ); + ASSERT_EQUALS( p2.get() , ptr(ptr(p2)) ); +#endif + } + }; + + struct StringSplitterTest { + + void test( string s ){ + vector v = StringSplitter::split( s , "," ); + ASSERT_EQUALS( s , StringSplitter::join( v , "," ) ); + } + + void run(){ + test( "a" ); + test( "a,b" ); + test( "a,b,c" ); + } + }; + + struct IsValidUTF8Test { +// macros used to get valid line numbers +#define good(s) ASSERT(isValidUTF8(s)); +#define bad(s) ASSERT(!isValidUTF8(s)); + + void run() { + good("A"); + good("\xC2\xA2"); // cent: ¢ + good("\xE2\x82\xAC"); // euro: € + good("\xF0\x9D\x90\x80"); // Blackboard A: 𝐀 + + //abrupt end + bad("\xC2"); + bad("\xE2\x82"); + bad("\xF0\x9D\x90"); + bad("\xC2 "); + bad("\xE2\x82 "); + bad("\xF0\x9D\x90 "); + + //too long + bad("\xF8\x80\x80\x80\x80"); + bad("\xFC\x80\x80\x80\x80\x80"); + bad("\xFE\x80\x80\x80\x80\x80\x80"); + bad("\xFF\x80\x80\x80\x80\x80\x80\x80"); + + bad("\xF5\x80\x80\x80"); // U+140000 > U+10FFFF + bad("\x80"); //cant start with continuation byte + bad("\xC0\x80"); // 2-byte version of ASCII NUL +#undef good +#undef bad + } + }; + + + class All : public Suite { public: All() : Suite( "basic" ){ @@ -336,6 +481,13 @@ add< ArrayTests::basic1 >(); add< LexNumCmp >(); + + add< DatabaseValidNames >(); + + add< PtrTests >(); + + add< StringSplitterTest >(); + add< IsValidUTF8Test >(); } } myall; diff -Nru mongodb-1.4.4/dbtests/btreetests.cpp mongodb-1.6.3/dbtests/btreetests.cpp --- mongodb-1.4.4/dbtests/btreetests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/btreetests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/db.h" #include "../db/btree.h" @@ -26,47 +26,47 @@ namespace BtreeTests { - class Base { + const char* ns() { + return "unittests.btreetests"; + } + + class Ensure { + public: + Ensure() { + _c.ensureIndex( ns(), BSON( "a" << 1 ), false, "testIndex" ); + } + ~Ensure() { + _c.dropIndexes( ns() ); + } + private: + DBDirectClient _c; + }; + + class Base : public Ensure { public: Base() : - _context( ns() ) { - + _context( ns() ) { { bool f = false; assert( f = true ); massert( 10402 , "assert is misdefined", f); } - BSONObjBuilder builder; - builder.append( "ns", ns() ); - builder.append( "name", "testIndex" ); - BSONObj bobj = builder.done(); - idx_.info = - theDataFileMgr.insert( ns(), bobj.objdata(), bobj.objsize() ); - idx_.head = BtreeBucket::addBucket( idx_ ); - } - ~Base() { - // FIXME cleanup all btree buckets. - theDataFileMgr.deleteRecord( ns(), idx_.info.rec(), idx_.info ); - ASSERT( theDataFileMgr.findAll( ns() )->eof() ); } protected: - BtreeBucket* bt() const { - return idx_.head.btree(); + BtreeBucket* bt() { + return id().head.btree(); } - DiskLoc dl() const { - return idx_.head; + DiskLoc dl() { + return id().head; } IndexDetails& id() { - return idx_; - } - static const char* ns() { - return "unittests.btreetests"; + return nsdetails( ns() )->idx( 1 ); } // dummy, valid record loc static DiskLoc recordLoc() { return DiskLoc( 0, 2 ); } - void checkValid( int nKeys ) const { + void checkValid( int nKeys ) { ASSERT( bt() ); ASSERT( bt()->isHead() ); bt()->assertValid( order(), true ); @@ -76,7 +76,7 @@ bt()->dumpTree( dl(), order() ); } void insert( BSONObj &key ) { - bt()->bt_insert( dl(), recordLoc(), key, order(), true, id(), true ); + bt()->bt_insert( dl(), recordLoc(), key, Ordering::make(order()), true, id(), true ); } void unindex( BSONObj &key ) { bt()->unindex( dl(), id(), key, recordLoc() ); @@ -93,18 +93,17 @@ int pos; bool found; DiskLoc location = - bt()->locate( id(), dl(), key, order(), pos, found, recordLoc(), direction ); + bt()->locate( id(), dl(), key, Ordering::make(order()), pos, found, recordLoc(), direction ); ASSERT_EQUALS( expectedFound, found ); ASSERT( location == expectedLocation ); ASSERT_EQUALS( expectedPos, pos ); } - BSONObj order() const { - return idx_.keyPattern(); + BSONObj order() { + return id().keyPattern(); } private: dblock lk_; Client::Context _context; - IndexDetails idx_; }; class Create : public Base { @@ -251,6 +250,122 @@ Base::insert( k ); } }; + + class ReuseUnused : public Base { + public: + void run() { + for ( int i = 0; i < 10; ++i ) { + insert( i ); + } + BSONObj root = key( 'p' ); + unindex( root ); + Base::insert( root ); + locate( root, 0, true, dl(), 1 ); + } + private: + BSONObj key( char c ) { + return simpleKey( c, 800 ); + } + void insert( int i ) { + BSONObj k = key( 'b' + 2 * i ); + Base::insert( k ); + } + }; + + class PackUnused : public Base { + public: + void run() { + for ( long long i = 0; i < 1000000; i += 1000 ) { + insert( i ); + } + string orig, after; + { + stringstream ss; + bt()->shape( ss ); + orig = ss.str(); + } + vector< string > toDel; + vector< string > other; + BSONObjBuilder start; + start.appendMinKey( "a" ); + BSONObjBuilder end; + end.appendMaxKey( "a" ); + auto_ptr< BtreeCursor > c( new BtreeCursor( nsdetails( ns() ), 1, id(), start.done(), end.done(), false, 1 ) ); + while( c->ok() ) { + if ( !c->currKeyNode().prevChildBucket.isNull() ) { + toDel.push_back( c->currKey().firstElement().valuestr() ); + } else { + other.push_back( c->currKey().firstElement().valuestr() ); + } + c->advance(); + } + ASSERT( toDel.size() > 0 ); + for( vector< string >::const_iterator i = toDel.begin(); i != toDel.end(); ++i ) { + BSONObj o = BSON( "a" << *i ); + unindex( o ); + } + ASSERT( other.size() > 0 ); + for( vector< string >::const_iterator i = other.begin(); i != other.end(); ++i ) { + BSONObj o = BSON( "a" << *i ); + unindex( o ); + } + + int unused = 0; + ASSERT_EQUALS( 0, bt()->fullValidate( dl(), order(), &unused ) ); + + for ( long long i = 50000; i < 50100; ++i ) { + insert( i ); + } + + int unused2 = 0; + ASSERT_EQUALS( 100, bt()->fullValidate( dl(), order(), &unused2 ) ); + + ASSERT( unused2 < unused ); + } + protected: + void insert( long long n ) { + string val( 800, ' ' ); + for( int i = 0; i < 800; i += 8 ) { + for( int j = 0; j < 8; ++j ) { + // probably we won't get > 56 bits + unsigned char v = 0x80 | ( n >> ( ( 8 - j - 1 ) * 7 ) & 0x000000000000007f ); + val[ i + j ] = v; + } + } + BSONObj k = BSON( "a" << val ); + Base::insert( k ); + } + }; + + class DontDropReferenceKey : public PackUnused { + public: + void run() { + // with 80 root node is full + for ( long long i = 0; i < 80; i += 1 ) { + insert( i ); + } + + BSONObjBuilder start; + start.appendMinKey( "a" ); + BSONObjBuilder end; + end.appendMaxKey( "a" ); + BSONObj l = bt()->keyNode( 0 ).key; + string toInsert; + auto_ptr< BtreeCursor > c( new BtreeCursor( nsdetails( ns() ), 1, id(), start.done(), end.done(), false, 1 ) ); + while( c->ok() ) { + if ( c->currKey().woCompare( l ) > 0 ) { + toInsert = c->currKey().firstElement().valuestr(); + break; + } + c->advance(); + } + // too much work to try to make this happen through inserts and deletes + const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket ) = DiskLoc(); + const_cast< DiskLoc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() |= 1; // make unused + BSONObj k = BSON( "a" << toInsert ); + Base::insert( k ); + } + }; class All : public Suite { public: @@ -265,6 +380,9 @@ add< MissingLocate >(); add< MissingLocateMultiBucket >(); add< SERVER983 >(); + add< ReuseUnused >(); + add< PackUnused >(); + add< DontDropReferenceKey >(); } } myall; } diff -Nru mongodb-1.4.4/dbtests/clienttests.cpp mongodb-1.6.3/dbtests/clienttests.cpp --- mongodb-1.4.4/dbtests/clienttests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/clienttests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ // client.cpp -#include "stdafx.h" +#include "pch.h" #include "../client/dbclient.h" #include "dbtests.h" #include "../db/concurrency.h" @@ -119,12 +119,17 @@ for( int i = 0; i < 10; ++i ) db.insert( ns(), BSON( "i" << i ) ); auto_ptr< DBClientCursor > c = db.query( ns(), Query().sort( BSON( "i" << 1 ) ) ); + BSONObj o = c->next(); ASSERT( c->more() ); + ASSERT_EQUALS( 9 , c->objsLeftInBatch() ); ASSERT( c->moreInCurrentBatch() ); + c->putBack( o ); ASSERT( c->more() ); + ASSERT_EQUALS( 10, c->objsLeftInBatch() ); ASSERT( c->moreInCurrentBatch() ); + o = c->next(); BSONObj o2 = c->next(); BSONObj o3 = c->next(); @@ -136,9 +141,12 @@ ASSERT_EQUALS( i, o[ "i" ].number() ); } ASSERT( !c->more() ); + ASSERT_EQUALS( 0, c->objsLeftInBatch() ); ASSERT( !c->moreInCurrentBatch() ); + c->putBack( o ); ASSERT( c->more() ); + ASSERT_EQUALS( 1, c->objsLeftInBatch() ); ASSERT( c->moreInCurrentBatch() ); ASSERT_EQUALS( 1, c->itcount() ); } @@ -153,7 +161,7 @@ ASSERT( db.runCommand( "unittests", BSON( "collstats" << "clienttests.create" ), info ) ); } }; - + class All : public Suite { public: All() : Suite( "client" ){ diff -Nru mongodb-1.4.4/dbtests/commandtests.cpp mongodb-1.6.3/dbtests/commandtests.cpp --- mongodb-1.4.4/dbtests/commandtests.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/dbtests/commandtests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,98 @@ +/** + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "pch.h" +#include "../client/dbclient.h" +#include "dbtests.h" +#include "../db/concurrency.h" + +using namespace mongo; + +namespace CommandTests { + // one namespace per command + namespace FileMD5{ + struct Base { + Base(){ + db.dropCollection(ns()); + db.ensureIndex(ns(), BSON( "files_id" << 1 << "n" << 1 )); + } + + const char* ns() { return "test.fs.chunks"; } + + DBDirectClient db; + }; + struct Type0 : Base { + void run(){ + { + BSONObjBuilder b; + b.genOID(); + b.append("files_id", 0); + b.append("n", 0); + b.appendBinData("data", 6, BinDataGeneral, "hello "); + db.insert(ns(), b.obj()); + } + { + BSONObjBuilder b; + b.genOID(); + b.append("files_id", 0); + b.append("n", 1); + b.appendBinData("data", 5, BinDataGeneral, "world"); + db.insert(ns(), b.obj()); + } + + BSONObj result; + ASSERT( db.runCommand("test", BSON("filemd5" << 0), result) ); + ASSERT_EQUALS( string("5eb63bbbe01eeed093cb22bb8f5acdc3") , result["md5"].valuestr() ); + } + }; + struct Type2 : Base{ + void run(){ + { + BSONObjBuilder b; + b.genOID(); + b.append("files_id", 0); + b.append("n", 0); + b.appendBinDataArrayDeprecated("data", "hello ", 6); + db.insert(ns(), b.obj()); + } + { + BSONObjBuilder b; + b.genOID(); + b.append("files_id", 0); + b.append("n", 1); + b.appendBinDataArrayDeprecated("data", "world", 5); + db.insert(ns(), b.obj()); + } + + BSONObj result; + ASSERT( db.runCommand("test", BSON("filemd5" << 0), result) ); + ASSERT_EQUALS( string("5eb63bbbe01eeed093cb22bb8f5acdc3") , result["md5"].valuestr() ); + } + }; + } + + class All : public Suite { + public: + All() : Suite( "commands" ){ + } + + void setupTests(){ + add< FileMD5::Type0 >(); + add< FileMD5::Type2 >(); + } + + } all; +} diff -Nru mongodb-1.4.4/dbtests/cursortests.cpp mongodb-1.6.3/dbtests/cursortests.cpp --- mongodb-1.4.4/dbtests/cursortests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/cursortests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/db.h" #include "../db/clientcursor.h" #include "../db/instance.h" @@ -28,7 +28,29 @@ namespace BtreeCursorTests { - class MultiRange { + // The ranges expressed in these tests are impossible given our query + // syntax, so going to do them a hacky way. + + class Base { + protected: + FieldRangeVector *vec( int *vals, int len, int direction = 1 ) { + FieldRangeSet s( "", BSON( "a" << 1 ) ); + for( int i = 0; i < len; i += 2 ) { + _objs.push_back( BSON( "a" << BSON( "$gte" << vals[ i ] << "$lte" << vals[ i + 1 ] ) ) ); + FieldRangeSet s2( "", _objs.back() ); + if ( i == 0 ) { + s.range( "a" ) = s2.range( "a" ); + } else { + s.range( "a" ) |= s2.range( "a" ); + } + } + return new FieldRangeVector( s, BSON( "a" << 1 ), direction ); + } + private: + vector< BSONObj > _objs; + }; + + class MultiRange : public Base { public: void run() { dblock lk; @@ -39,11 +61,10 @@ c.insert( ns, BSON( "a" << i ) ); ASSERT( c.ensureIndex( ns, BSON( "a" << 1 ) ) ); } - BoundList b; - b.push_back( pair< BSONObj, BSONObj >( BSON( "" << 1 ), BSON( "" << 2 ) ) ); - b.push_back( pair< BSONObj, BSONObj >( BSON( "" << 4 ), BSON( "" << 6 ) ) ); + int v[] = { 1, 2, 4, 6 }; + boost::shared_ptr< FieldRangeVector > frv( vec( v, 4 ) ); Client::Context ctx( ns ); - BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), b, 1 ); + BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, 1 ); ASSERT_EQUALS( "BtreeCursor a_1 multi", c.toString() ); double expected[] = { 1, 2, 4, 5, 6 }; for( int i = 0; i < 5; ++i ) { @@ -55,7 +76,7 @@ } }; - class MultiRangeGap { + class MultiRangeGap : public Base { public: void run() { dblock lk; @@ -68,12 +89,10 @@ c.insert( ns, BSON( "a" << i ) ); ASSERT( c.ensureIndex( ns, BSON( "a" << 1 ) ) ); } - BoundList b; - b.push_back( pair< BSONObj, BSONObj >( BSON( "" << -50 ), BSON( "" << 2 ) ) ); - b.push_back( pair< BSONObj, BSONObj >( BSON( "" << 40 ), BSON( "" << 60 ) ) ); - b.push_back( pair< BSONObj, BSONObj >( BSON( "" << 109 ), BSON( "" << 200 ) ) ); + int v[] = { -50, 2, 40, 60, 109, 200 }; + boost::shared_ptr< FieldRangeVector > frv( vec( v, 6 ) ); Client::Context ctx( ns ); - BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), b, 1 ); + BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, 1 ); ASSERT_EQUALS( "BtreeCursor a_1 multi", c.toString() ); double expected[] = { 0, 1, 2, 109 }; for( int i = 0; i < 4; ++i ) { @@ -85,7 +104,7 @@ } }; - class MultiRangeReverse { + class MultiRangeReverse : public Base { public: void run() { dblock lk; @@ -96,11 +115,10 @@ c.insert( ns, BSON( "a" << i ) ); ASSERT( c.ensureIndex( ns, BSON( "a" << 1 ) ) ); } - BoundList b; - b.push_back( pair< BSONObj, BSONObj >( BSON( "" << 6 ), BSON( "" << 4 ) ) ); - b.push_back( pair< BSONObj, BSONObj >( BSON( "" << 2 ), BSON( "" << 1 ) ) ); + int v[] = { 1, 2, 4, 6 }; + boost::shared_ptr< FieldRangeVector > frv( vec( v, 4, -1 ) ); Client::Context ctx( ns ); - BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), b, -1 ); + BtreeCursor c( nsdetails( ns ), 1, nsdetails( ns )->idx(1), frv, -1 ); ASSERT_EQUALS( "BtreeCursor a_1 reverse multi", c.toString() ); double expected[] = { 6, 5, 4, 2, 1 }; for( int i = 0; i < 5; ++i ) { @@ -112,6 +130,122 @@ } }; + class Base2 { + public: + virtual ~Base2() { _c.dropCollection( ns() ); } + protected: + static const char *ns() { return "unittests.cursortests.Base2"; } + DBDirectClient _c; + virtual BSONObj idx() const = 0; + virtual int direction() const { return 1; } + void insert( const BSONObj &o ) { + _objs.push_back( o ); + _c.insert( ns(), o ); + } + void check( const BSONObj &spec ) { + _c.ensureIndex( ns(), idx() ); + Client::Context ctx( ns() ); + FieldRangeSet frs( ns(), spec ); + boost::shared_ptr< FieldRangeVector > frv( new FieldRangeVector( frs, idx(), direction() ) ); + BtreeCursor c( nsdetails( ns() ), 1, nsdetails( ns() )->idx( 1 ), frv, direction() ); + Matcher m( spec ); + int count = 0; + while( c.ok() ) { + ASSERT( m.matches( c.current() ) ); + c.advance(); + ++count; + } + int expectedCount = 0; + for( vector< BSONObj >::const_iterator i = _objs.begin(); i != _objs.end(); ++i ) { + if ( m.matches( *i ) ) { + ++expectedCount; + } + } + ASSERT_EQUALS( expectedCount, count ); + } + private: + dblock _lk; + vector< BSONObj > _objs; + }; + + class EqEq : public Base2 { + public: + void run() { + insert( BSON( "a" << 4 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 4 ) ); + insert( BSON( "a" << 5 << "b" << 4 ) ); + check( BSON( "a" << 4 << "b" << 5 ) ); + } + virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); } + }; + + class EqRange : public Base2 { + public: + void run() { + insert( BSON( "a" << 3 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 0 ) ); + insert( BSON( "a" << 4 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 6 ) ); + insert( BSON( "a" << 4 << "b" << 6 ) ); + insert( BSON( "a" << 4 << "b" << 10 ) ); + insert( BSON( "a" << 4 << "b" << 11 ) ); + insert( BSON( "a" << 5 << "b" << 5 ) ); + check( BSON( "a" << 4 << "b" << BSON( "$gte" << 1 << "$lte" << 10 ) ) ); + } + virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); } + }; + + class EqIn : public Base2 { + public: + void run() { + insert( BSON( "a" << 3 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 0 ) ); + insert( BSON( "a" << 4 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 6 ) ); + insert( BSON( "a" << 4 << "b" << 6 ) ); + insert( BSON( "a" << 4 << "b" << 10 ) ); + insert( BSON( "a" << 4 << "b" << 11 ) ); + insert( BSON( "a" << 5 << "b" << 5 ) ); + check( BSON( "a" << 4 << "b" << BSON( "$in" << BSON_ARRAY( 5 << 6 << 11 ) ) ) ); + } + virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); } + }; + + class RangeEq : public Base2 { + public: + void run() { + insert( BSON( "a" << 0 << "b" << 4 ) ); + insert( BSON( "a" << 1 << "b" << 4 ) ); + insert( BSON( "a" << 4 << "b" << 3 ) ); + insert( BSON( "a" << 5 << "b" << 4 ) ); + insert( BSON( "a" << 7 << "b" << 4 ) ); + insert( BSON( "a" << 4 << "b" << 4 ) ); + insert( BSON( "a" << 9 << "b" << 6 ) ); + insert( BSON( "a" << 11 << "b" << 1 ) ); + insert( BSON( "a" << 11 << "b" << 4 ) ); + check( BSON( "a" << BSON( "$gte" << 1 << "$lte" << 10 ) << "b" << 4 ) ); + } + virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); } + }; + + class RangeIn : public Base2 { + public: + void run() { + insert( BSON( "a" << 0 << "b" << 4 ) ); + insert( BSON( "a" << 1 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 3 ) ); + insert( BSON( "a" << 5 << "b" << 4 ) ); + insert( BSON( "a" << 7 << "b" << 5 ) ); + insert( BSON( "a" << 4 << "b" << 4 ) ); + insert( BSON( "a" << 9 << "b" << 6 ) ); + insert( BSON( "a" << 11 << "b" << 1 ) ); + insert( BSON( "a" << 11 << "b" << 4 ) ); + check( BSON( "a" << BSON( "$gte" << 1 << "$lte" << 10 ) << "b" << BSON( "$in" << BSON_ARRAY( 4 << 6 ) ) ) ); + } + virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); } + }; + } // namespace BtreeCursorTests class All : public Suite { @@ -122,6 +256,11 @@ add< BtreeCursorTests::MultiRange >(); add< BtreeCursorTests::MultiRangeGap >(); add< BtreeCursorTests::MultiRangeReverse >(); + add< BtreeCursorTests::EqEq >(); + add< BtreeCursorTests::EqRange >(); + add< BtreeCursorTests::EqIn >(); + add< BtreeCursorTests::RangeEq >(); + add< BtreeCursorTests::RangeIn >(); } } myall; } // namespace CursorTests diff -Nru mongodb-1.4.4/dbtests/dbtests.cpp mongodb-1.6.3/dbtests/dbtests.cpp --- mongodb-1.4.4/dbtests/dbtests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/dbtests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "dbtests.h" diff -Nru mongodb-1.4.4/dbtests/dbtests.h mongodb-1.6.3/dbtests/dbtests.h --- mongodb-1.4.4/dbtests/dbtests.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/dbtests.h 2010-09-24 10:02:42.000000000 -0700 @@ -21,4 +21,5 @@ using namespace mongo; using namespace mongo::regression; +using boost::shared_ptr; diff -Nru mongodb-1.4.4/dbtests/framework.cpp mongodb-1.6.3/dbtests/framework.cpp --- mongodb-1.4.4/dbtests/framework.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/framework.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,11 +16,12 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" +#include "../util/version.h" #include #undef assert -#define assert xassert +#define assert MONGO_assert #include "framework.h" #include "../util/file_allocator.h" @@ -53,7 +54,7 @@ ss << result; for ( list::iterator i=_messages.begin(); i!=_messages.end(); i++ ){ - ss << "\t" << *i << "\n"; + ss << "\t" << *i << '\n'; } return ss.str(); @@ -76,7 +77,9 @@ Result * Result::cur = 0; - Result * Suite::run(){ + Result * Suite::run( const string& filter ){ + tlogLevel = -1; + log(1) << "\t about to setupTests" << endl; setupTests(); log(1) << "\t done setupTests" << endl; @@ -89,9 +92,13 @@ for ( list::iterator i=_tests.begin(); i!=_tests.end(); i++ ){ TestCase * tc = *i; + if ( filter.size() && tc->getName().find( filter ) == string::npos ){ + log(1) << "\t skipping test: " << tc->getName() << " because doesn't match filter" << endl; + continue; + } r->_tests++; - + bool passes = false; log(1) << "\t going to run test: " << tc->getName() << endl; @@ -154,10 +161,11 @@ "directory will be overwritten if it already exists") ("debug", "run tests with verbose output") ("list,l", "list available test suites") + ("filter,f" , po::value() , "string substring filter on test name" ) ("verbose,v", "verbose") ("seed", po::value(&seed), "random number seed") ; - + hidden_options.add_options() ("suites", po::value< vector >(), "test suites to run") ; @@ -236,7 +244,13 @@ if (params.count("suites")) { suites = params["suites"].as< vector >(); } - int ret = run(suites); + + string filter = ""; + if ( params.count( "filter" ) ){ + filter = params["filter"].as(); + } + + int ret = run(suites,filter); #if !defined(_WIN32) && !defined(__sunos__) flock( lockFile, LOCK_UN ); @@ -247,7 +261,7 @@ return ret; } - int Suite::run( vector suites ){ + int Suite::run( vector suites , const string& filter ){ for ( unsigned int i = 0; i < suites.size(); i++ ) { if ( _suites->find( suites[i] ) == _suites->end() ) { cout << "invalid test [" << suites[i] << "], use --list to see valid names" << endl; @@ -269,7 +283,7 @@ assert( s ); log() << "going to run suite: " << name << endl; - results.push_back( s->run() ); + results.push_back( s->run( filter ) ); } Logstream::get().flush(); @@ -329,22 +343,6 @@ assert(0); } - string demangleName( const type_info& typeinfo ){ -#ifdef _WIN32 - return typeinfo.name(); -#else - int status; - - char * niceName = abi::__cxa_demangle(typeinfo.name(), 0, 0, &status); - if ( ! niceName ) - return typeinfo.name(); - - string s = niceName; - free(niceName); - return s; -#endif - } - MyAssertionException * MyAsserts::getBase(){ MyAssertionException * e = new MyAssertionException(); e->ss << _file << ":" << _line << " " << _aexp << " != " << _bexp << " "; diff -Nru mongodb-1.4.4/dbtests/framework.h mongodb-1.6.3/dbtests/framework.h --- mongodb-1.4.4/dbtests/framework.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/framework.h 2010-09-24 10:02:42.000000000 -0700 @@ -21,7 +21,7 @@ simple portable regression system */ -#include "../stdafx.h" +#include "../pch.h" #define ASSERT_EXCEPTION(a,b) \ try { \ @@ -34,6 +34,8 @@ #define ASSERT_EQUALS(a,b) (mongo::regression::MyAsserts( #a , #b , __FILE__ , __LINE__ ) ).ae( (a) , (b) ) +#define ASSERT_NOT_EQUALS(a,b) (mongo::regression::MyAsserts( #a , #b , __FILE__ , __LINE__ ) ).nae( (a) , (b) ) + #define ASSERT(x) (void)( (!(!(x))) ? mongo::regression::assert_pass() : mongo::regression::assert_fail( #x , __FILE__ , __LINE__ ) ) #define FAIL(x) mongo::regression::fail( #x , __FILE__ , __LINE__ ) @@ -45,8 +47,6 @@ class Result; - string demangleName( const type_info& typeinfo ); - class TestCase { public: virtual ~TestCase(){} @@ -112,9 +112,9 @@ _tests.push_back( new TestHolder1(a) ); } - Result * run(); + Result * run( const string& filter ); - static int run( vector suites ); + static int run( vector suites , const string& filter ); static int run( int argc , char ** argv , string default_dbpath ); @@ -166,6 +166,21 @@ throw e; } + template + void nae( A a , B b ){ + _gotAssert(); + if ( a != b ) + return; + + printLocation(); + + MyAssertionException * e = getBase(); + e->ss << a << " == " << b << endl; + log() << e->ss.str() << endl; + throw e; + } + + void printLocation(); private: diff -Nru mongodb-1.4.4/dbtests/histogram_test.cpp mongodb-1.6.3/dbtests/histogram_test.cpp --- mongodb-1.4.4/dbtests/histogram_test.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/dbtests/histogram_test.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,94 @@ +// histogramtests.cpp : histogram.{h,cpp} unit tests + +/** + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "../pch.h" + +#include "dbtests.h" +#include "../util/histogram.h" + +namespace mongo { + + using mongo::Histogram; + + class BoundariesInit{ + public: + void run(){ + Histogram::Options opts; + opts.numBuckets = 3; + opts.bucketSize = 10; + Histogram h( opts ); + + ASSERT_EQUALS( h.getBucketsNum(), 3u ); + + ASSERT_EQUALS( h.getCount( 0 ), 0u ); + ASSERT_EQUALS( h.getCount( 1 ), 0u ); + ASSERT_EQUALS( h.getCount( 2 ), 0u ); + + ASSERT_EQUALS( h.getBoundary( 0 ), 10u ); + ASSERT_EQUALS( h.getBoundary( 1 ), 20u ); + ASSERT_EQUALS( h.getBoundary( 2 ), numeric_limits::max() ); + } + }; + + class BoundariesExponential{ + public: + void run(){ + Histogram::Options opts; + opts.numBuckets = 4; + opts.bucketSize = 125; + opts.exponential = true; + Histogram h( opts ); + + ASSERT_EQUALS( h.getBoundary( 0 ), 125u ); + ASSERT_EQUALS( h.getBoundary( 1 ), 250u ); + ASSERT_EQUALS( h.getBoundary( 2 ), 500u ); + ASSERT_EQUALS( h.getBoundary( 3 ), numeric_limits::max() ); + } + }; + + class BoundariesFind{ + public: + void run(){ + Histogram::Options opts; + opts.numBuckets = 3; + opts.bucketSize = 10; + Histogram h( opts ); + + h.insert( 10 ); // end of first bucket + h.insert( 15 ); // second bucket + h.insert( 18 ); // second bucket + + ASSERT_EQUALS( h.getCount( 0 ), 1u ); + ASSERT_EQUALS( h.getCount( 1 ), 2u ); + ASSERT_EQUALS( h.getCount( 2 ), 0u ); + } + }; + + class HistogramSuite : public Suite { + public: + HistogramSuite() : Suite( "histogram" ){} + + void setupTests(){ + add< BoundariesInit >(); + add< BoundariesExponential >(); + add< BoundariesFind >(); + // TODO: complete the test suite + } + } histogramSuite; + +} // anonymous namespace diff -Nru mongodb-1.4.4/dbtests/jsobjtests.cpp mongodb-1.6.3/dbtests/jsobjtests.cpp --- mongodb-1.4.4/dbtests/jsobjtests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/jsobjtests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/jsobj.h" #include "../db/jsobjmanipulator.h" #include "../db/json.h" @@ -25,13 +25,14 @@ #include "../db/extsort.h" #include "dbtests.h" +#include "../util/mongoutils/checksum.h" namespace JsobjTests { class BufBuilderBasic { public: void run() { BufBuilder b( 0 ); - b.append( "foo" ); + b.appendStr( "foo" ); ASSERT_EQUALS( 4, b.len() ); ASSERT( strcmp( "foo", b.buf() ) == 0 ); } @@ -223,6 +224,9 @@ BSONElementManipulator( o.firstElement() ).initTimestamp(); test = OpTime( o.firstElement().date() ); ASSERT( before < test && test < after ); + + OpTime x(123,456); + ASSERT_EQUALS( 528280977864LL , x.asLL() ); } }; @@ -266,7 +270,7 @@ bb << "b" << 2; BSONObj obj = bb.obj(); - ASSERT(obj.objsize() == 4+(1+2+4)+(1+2+4)+1); + ASSERT_EQUALS(obj.objsize() , 4+(1+2+4)+(1+2+4)+1); ASSERT(obj.valid()); ASSERT(obj.hasField("a")); ASSERT(obj.hasField("b")); @@ -300,7 +304,7 @@ ASSERT(tmp.hasField("a")); ASSERT(!tmp.hasField("b")); ASSERT(tmp == BSON("a" << 1)); - + //force a realloc BSONArrayBuilder arr; for (int i=0; i < 10000; i++){ @@ -311,7 +315,6 @@ ASSERT(obj.valid()); ASSERT(obj.hasField("a")); ASSERT(obj.hasField("b")); - ASSERT(obj.objdata() != tmp.objdata()); } } }; @@ -377,6 +380,67 @@ } }; + + class ToStringArray { + public: + void run() { + string spec = "{ a: [ \"a\", \"b\" ] }"; + ASSERT_EQUALS( spec, fromjson( spec ).toString() ); + } + }; + + class ToStringNumber { + public: + + void run(){ + BSONObjBuilder b; + b.append( "a" , (int)4 ); + b.append( "b" , (double)5 ); + b.append( "c" , (long long)6 ); + + b.append( "d" , 123.456789123456789123456789123456789 ); + b.append( "e" , 123456789.123456789123456789123456789 ); + b.append( "f" , 1234567891234567891234.56789123456789 ); + + b.append( "g" , -123.456 ); + + BSONObj x = b.obj(); + ASSERT_EQUALS( "4", x["a"].toString( false , true ) ); + ASSERT_EQUALS( "5.0", x["b"].toString( false , true ) ); + ASSERT_EQUALS( "6", x["c"].toString( false , true ) ); + + ASSERT_EQUALS( "123.4567891234568" , x["d"].toString( false , true ) ); + ASSERT_EQUALS( "123456789.1234568" , x["e"].toString( false , true ) ); + // ASSERT_EQUALS( "1.234567891234568e+21" , x["f"].toString( false , true ) ); // windows and *nix are different - TODO, work around for test or not bother? + + ASSERT_EQUALS( "-123.456" , x["g"].toString( false , true ) ); + + } + }; + + class NullString { + public: + void run() { + BSONObjBuilder b; + b.append("a", "a\0b", 4); + b.append("b", string("a\0b", 3)); + b.appendAs(b.asTempObj()["a"], "c"); + BSONObj o = b.obj(); + + stringstream ss; + ss << 'a' << '\0' << 'b'; + + ASSERT_EQUALS(o["a"].valuestrsize(), 3+1); + ASSERT_EQUALS(o["a"].str(), ss.str()); + + ASSERT_EQUALS(o["b"].valuestrsize(), 3+1); + ASSERT_EQUALS(o["b"].str(), ss.str()); + + ASSERT_EQUALS(o["c"].valuestrsize(), 3+1); + ASSERT_EQUALS(o["c"].str(), ss.str()); + } + + }; namespace Validation { @@ -631,7 +695,7 @@ "\"seven\": [ \"a\", \"bb\", \"ccc\", 5 ]," "\"eight\": Dbref( \"rrr\", \"01234567890123456789aaaa\" )," "\"_id\": ObjectId( \"deadbeefdeadbeefdeadbeef\" )," - "\"nine\": { \"$binary\": \"abc=\", \"$type\": \"02\" }," + "\"nine\": { \"$binary\": \"abc=\", \"$type\": \"00\" }," "\"ten\": Date( 44 ), \"eleven\": /foooooo/i }" ); fuzz( b ); b.valid(); @@ -721,8 +785,50 @@ ASSERT( a.woCompare( c ) < 0 ); } }; + + class ToDate { + public: + void run(){ + OID oid; + + { + time_t before = ::time(0); + oid.init(); + time_t after = ::time(0); + ASSERT( oid.asTimeT() >= before ); + ASSERT( oid.asTimeT() <= after ); + } + + { + Date_t before = jsTime(); + sleepsecs(1); + oid.init(); + Date_t after = jsTime(); + ASSERT( oid.asDateT() >= before ); + ASSERT( oid.asDateT() <= after ); + } + } + }; + + class FromDate { + public: + void run(){ + OID min, oid, max; + Date_t now = jsTime(); + oid.init(); // slight chance this has different time. If its a problem, can change. + min.init(now); + max.init(now, true); + + ASSERT_EQUALS( (unsigned)oid.asTimeT() , now/1000 ); + ASSERT_EQUALS( (unsigned)min.asTimeT() , now/1000 ); + ASSERT_EQUALS( (unsigned)max.asTimeT() , now/1000 ); + ASSERT( BSON("" << min).woCompare( BSON("" << oid) ) < 0 ); + ASSERT( BSON("" << max).woCompare( BSON("" << oid) )> 0 ); + } + }; } // namespace OIDTests + namespace ValueStreamTests { class LabelBase { @@ -795,6 +901,19 @@ << "x" << "p" ); } }; + class LabelishOr : public LabelBase { + BSONObj expected() { + return BSON( "$or" << BSON_ARRAY( + BSON("a" << BSON( "$gt" << 1 << "$lte" << "x" )) + << BSON("b" << BSON( "$ne" << 1 << "$ne" << "f" << "$ne" << 22.3 )) + << BSON("x" << "p" ))); + } + BSONObj actual() { + return OR( BSON( "a" << GT << 1 << LTE << "x"), + BSON( "b" << NE << 1 << NE << "f" << NE << 22.3), + BSON( "x" << "p" ) ); + } + }; class Unallowed { public: @@ -1154,9 +1273,9 @@ auto_ptr i = sorter.iterator(); while( i->more() ) { BSONObjExternalSorter::Data d = i->next(); - cout << d.second.toString() << endl; + /*cout << d.second.toString() << endl; cout << d.first.objsize() << endl; - cout<<"SORTER next:" << d.first.toString() << endl; + cout<<"SORTER next:" << d.first.toString() << endl;*/ } } }; @@ -1420,6 +1539,166 @@ } }; + class EmbeddedNumbers { + public: + void run(){ + BSONObj x = BSON( "a" << BSON( "b" << 1 ) ); + BSONObj y = BSON( "a" << BSON( "b" << 1.0 ) ); + ASSERT_EQUALS( x , y ); + ASSERT_EQUALS( 0 , x.woCompare( y ) ); + } + }; + + class BuilderPartialItearte { + public: + void run(){ + { + BSONObjBuilder b; + b.append( "x" , 1 ); + b.append( "y" , 2 ); + + BSONObjIterator i = b.iterator(); + ASSERT( i.more() ); + ASSERT_EQUALS( 1 , i.next().numberInt() ); + ASSERT( i.more() ); + ASSERT_EQUALS( 2 , i.next().numberInt() ); + ASSERT( ! i.more() ); + + b.append( "z" , 3 ); + + i = b.iterator(); + ASSERT( i.more() ); + ASSERT_EQUALS( 1 , i.next().numberInt() ); + ASSERT( i.more() ); + ASSERT_EQUALS( 2 , i.next().numberInt() ); + ASSERT( i.more() ); + ASSERT_EQUALS( 3 , i.next().numberInt() ); + ASSERT( ! i.more() ); + + ASSERT_EQUALS( BSON( "x" << 1 << "y" << 2 << "z" << 3 ) , b.obj() ); + } + + } + }; + + class BSONFieldTests { + public: + void run(){ + { + BSONField x("x"); + BSONObj o = BSON( x << 5 ); + ASSERT_EQUALS( BSON( "x" << 5 ) , o ); + } + + { + BSONField x("x"); + BSONObj o = BSON( x.make(5) ); + ASSERT_EQUALS( BSON( "x" << 5 ) , o ); + } + + { + BSONField x("x"); + BSONObj o = BSON( x(5) ); + ASSERT_EQUALS( BSON( "x" << 5 ) , o ); + + o = BSON( x.gt(5) ); + ASSERT_EQUALS( BSON( "x" << BSON( "$gt" << 5 ) ) , o ); + } + + } + }; + + class BSONForEachTest { + public: + void run(){ + BSONObj obj = BSON("a" << 1 << "a" << 2 << "a" << 3); + + int count = 0; + BSONForEach(e, obj){ + ASSERT_EQUALS( e.fieldName() , string("a") ); + count += e.Int(); + } + + ASSERT_EQUALS( count , 1+2+3 ); + } + }; + + class StringDataTest { + public: + void run(){ + StringData a( string( "aaa" ) ); + ASSERT_EQUALS( 3u , a.size() ); + + StringData b( string( "bbb" ).c_str() ); + ASSERT_EQUALS( 3u , b.size() ); + + StringData c( "ccc", StringData::LiteralTag() ); + ASSERT_EQUALS( 3u , c.size() ); + + // TODO update test when second parm takes StringData too + BSONObjBuilder builder; + builder.append( c, "value"); + ASSERT_EQUALS( builder.obj() , BSON( c.data() << "value" ) ); + + } + }; + + class CompareOps { + public: + void run(){ + + BSONObj a = BSON("a"<<1); + BSONObj b = BSON("a"<<1); + BSONObj c = BSON("a"<<2); + BSONObj d = BSON("a"<<3); + BSONObj e = BSON("a"<<4); + BSONObj f = BSON("a"<<4); + + ASSERT( ! ( a < b ) ); + ASSERT( a <= b ); + ASSERT( a < c ); + + ASSERT( f > d ); + ASSERT( f >= e ); + ASSERT( ! ( f > e ) ); + } + }; + + class HashingTest { + public: + void run(){ + int N = 100000; + BSONObj x = BSON( "name" << "eliot was here" + << "x" << 5 + << "asdasdasdas" << "asldkasldjasldjasldjlasjdlasjdlasdasdasdasdasdasdasd" ); + + { + Timer t; + for ( int i=0; i(); add< BSONObjTests::AppendIntOrLL >(); add< BSONObjTests::AppendNumber >(); + add< BSONObjTests::ToStringArray >(); + add< BSONObjTests::ToStringNumber >(); + add< BSONObjTests::NullString >(); add< BSONObjTests::Validation::BadType >(); add< BSONObjTests::Validation::EooBeforeEnd >(); add< BSONObjTests::Validation::Undefined >(); @@ -1478,12 +1760,21 @@ add< OIDTests::initParse1 >(); add< OIDTests::append >(); add< OIDTests::increasing >(); + add< OIDTests::ToDate >(); + add< OIDTests::FromDate >(); add< ValueStreamTests::LabelBasic >(); add< ValueStreamTests::LabelShares >(); add< ValueStreamTests::LabelDouble >(); add< ValueStreamTests::LabelDoubleShares >(); add< ValueStreamTests::LabelSize >(); add< ValueStreamTests::LabelMulti >(); + add< ValueStreamTests::LabelishOr >(); + add< ValueStreamTests::Unallowed >(); + add< ValueStreamTests::ElementAppend >(); + add< SubObjectBuilder >(); + add< DateBuilder >(); + add< DateNowBuilder >(); + add< TimeTBuilder >(); add< ValueStreamTests::Unallowed >(); add< ValueStreamTests::ElementAppend >(); add< SubObjectBuilder >(); @@ -1510,6 +1801,13 @@ add< checkForStorageTests >(); add< InvalidIDFind >(); add< ElementSetTest >(); + add< EmbeddedNumbers >(); + add< BuilderPartialItearte >(); + add< BSONFieldTests >(); + add< BSONForEachTest >(); + add< StringDataTest >(); + add< CompareOps >(); + add< HashingTest >(); } } myall; diff -Nru mongodb-1.4.4/dbtests/jsontests.cpp mongodb-1.6.3/dbtests/jsontests.cpp --- mongodb-1.4.4/dbtests/jsontests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/jsontests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/jsobj.h" #include "../db/json.h" @@ -248,18 +248,21 @@ z[ 1 ] = 'b'; z[ 2 ] = 'c'; BSONObjBuilder b; - b.appendBinData( "a", 3, ByteArray, z ); - ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YWJj\", \"$type\" : \"02\" } }", - b.done().jsonString( Strict ) ); + b.appendBinData( "a", 3, BinDataGeneral, z ); + + string o = b.done().jsonString( Strict ); + + ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YWJj\", \"$type\" : \"00\" } }", + o ); BSONObjBuilder c; - c.appendBinData( "a", 2, ByteArray, z ); - ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YWI=\", \"$type\" : \"02\" } }", + c.appendBinData( "a", 2, BinDataGeneral, z ); + ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YWI=\", \"$type\" : \"00\" } }", c.done().jsonString( Strict ) ); BSONObjBuilder d; - d.appendBinData( "a", 1, ByteArray, z ); - ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YQ==\", \"$type\" : \"02\" } }", + d.appendBinData( "a", 1, BinDataGeneral, z ); + ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YQ==\", \"$type\" : \"00\" } }", d.done().jsonString( Strict ) ); } }; @@ -333,7 +336,7 @@ ASSERT_EQUALS( "{ \"x\" : function(){ return 1; } }" , o.jsonString() ); } }; - + class TimestampTests { public: void run(){ @@ -344,7 +347,49 @@ } }; + class NullString { + public: + void run(){ + BSONObjBuilder b; + b.append( "x" , "a\0b" , 4 ); + BSONObj o = b.obj(); + ASSERT_EQUALS( "{ \"x\" : \"a\\u0000b\" }" , o.jsonString() ); + } + }; + + class AllTypes { + public: + void run(){ + OID oid; + oid.init(); + BSONObjBuilder b; + b.appendMinKey( "a" ); + b.append( "b" , 5.5 ); + b.append( "c" , "abc" ); + b.append( "e" , BSON( "x" << 1 ) ); + b.append( "f" , BSON_ARRAY( 1 << 2 << 3 ) ); + b.appendBinData( "g" , 5 , bdtCustom , (const char*)this ); + b.appendUndefined( "h" ); + b.append( "i" , oid ); + b.appendBool( "j" , 1 ); + b.appendDate( "k" , 123 ); + b.appendNull( "l" ); + b.appendRegex( "m" , "a" ); + b.appendDBRef( "n" , "foo" , oid ); + b.appendCode( "o" , "function(){}" ); + b.appendSymbol( "p" , "foo" ); + b.appendCodeWScope( "q" , "function(){}" , BSON("x" << 1 ) ); + b.append( "r" , (int)5 ); + b.appendTimestamp( "s" , 123123123123123LL ); + b.append( "t" , 12321312312LL ); + b.appendMaxKey( "u" ); + + BSONObj o = b.obj(); + cout << o.jsonString() << endl; + } + }; + } // namespace JsonStringTests namespace FromJsonTests { @@ -739,11 +784,11 @@ z[ 1 ] = 'b'; z[ 2 ] = 'c'; BSONObjBuilder b; - b.appendBinData( "a", 3, ByteArray, z ); + b.appendBinData( "a", 3, BinDataGeneral, z ); return b.obj(); } virtual string json() const { - return "{ \"a\" : { \"$binary\" : \"YWJj\", \"$type\" : \"02\" } }"; + return "{ \"a\" : { \"$binary\" : \"YWJj\", \"$type\" : \"00\" } }"; } }; @@ -753,11 +798,11 @@ z[ 0 ] = 'a'; z[ 1 ] = 'b'; BSONObjBuilder b; - b.appendBinData( "a", 2, ByteArray, z ); + b.appendBinData( "a", 2, BinDataGeneral, z ); return b.obj(); } virtual string json() const { - return "{ \"a\" : { \"$binary\" : \"YWI=\", \"$type\" : \"02\" } }"; + return "{ \"a\" : { \"$binary\" : \"YWI=\", \"$type\" : \"00\" } }"; } }; @@ -766,11 +811,11 @@ char z[ 1 ]; z[ 0 ] = 'a'; BSONObjBuilder b; - b.appendBinData( "a", 1, ByteArray, z ); + b.appendBinData( "a", 1, BinDataGeneral, z ); return b.obj(); } virtual string json() const { - return "{ \"a\" : { \"$binary\" : \"YQ==\", \"$type\" : \"02\" } }"; + return "{ \"a\" : { \"$binary\" : \"YQ==\", \"$type\" : \"00\" } }"; } }; @@ -784,11 +829,11 @@ 0x5D, 0xB7, 0xE3, 0x9E, 0xBB, 0xF3, 0xDF, 0xBF }; BSONObjBuilder b; - b.appendBinData( "a", 48, ByteArray, z ); + b.appendBinData( "a", 48, BinDataGeneral, z ); return b.obj(); } virtual string json() const { - return "{ \"a\" : { \"$binary\" : \"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/\", \"$type\" : \"02\" } }"; + return "{ \"a\" : { \"$binary\" : \"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/\", \"$type\" : \"00\" } }"; } }; @@ -1021,7 +1066,17 @@ return "{ \"time.valid\" : { $gt : new Date(1257829200000) , $lt : new Date( 1257829200100 ) } }"; } }; - + + class NullString : public Base { + virtual BSONObj bson() const { + BSONObjBuilder b; + b.append( "x" , "a\0b" , 4 ); + return b.obj(); + } + virtual string json() const { + return "{ \"x\" : \"a\\u0000b\" }"; + } + }; } // namespace FromJsonTests @@ -1059,7 +1114,9 @@ add< JsonStringTests::RegexManyOptions >(); add< JsonStringTests::CodeTests >(); add< JsonStringTests::TimestampTests >(); - + add< JsonStringTests::NullString >(); + add< JsonStringTests::AllTypes >(); + add< FromJsonTests::Empty >(); add< FromJsonTests::EmptyWithSpace >(); add< FromJsonTests::SingleString >(); @@ -1110,6 +1167,7 @@ add< FromJsonTests::EmbeddedDatesFormat1 >(); add< FromJsonTests::EmbeddedDatesFormat2 >(); add< FromJsonTests::EmbeddedDatesFormat3 >(); + add< FromJsonTests::NullString >(); } } myall; diff -Nru mongodb-1.4.4/dbtests/jstests.cpp mongodb-1.6.3/dbtests/jstests.cpp --- mongodb-1.4.4/dbtests/jstests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/jstests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,10 +17,10 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/instance.h" -#include "../stdafx.h" +#include "../pch.h" #include "../scripting/engine.h" #include "dbtests.h" @@ -531,12 +531,17 @@ ASSERT( s->exec( "printjson( a ); b = {b:a.a}", "foo", false, true, false ) ); out = s->getObject( "b" ); ASSERT_EQUALS( mongo::NumberLong, out.firstElement().type() ); - ASSERT_EQUALS( val, out.firstElement().numberLong() ); + if( val != out.firstElement().numberLong() ) { + cout << val << endl; + cout << out.firstElement().numberLong() << endl; + cout << out.toString() << endl; + ASSERT_EQUALS( val, out.firstElement().numberLong() ); + } ASSERT( s->exec( "c = {c:a.a.toString()}", "foo", false, true, false ) ); out = s->getObject( "c" ); stringstream ss; - ss << val; + ss << "NumberLong(\"" << val << "\")"; ASSERT_EQUALS( ss.str(), out.firstElement().valuestr() ); ASSERT( s->exec( "d = {d:a.a.toNumber()}", "foo", false, true, false ) ); @@ -627,7 +632,7 @@ private: void check( const BSONObj &one, const BSONObj &two ) { if ( one.woCompare( two ) != 0 ) { - static string fail = string( "Assertion failure expected " ) + string( one ) + ", got " + string( two ); + static string fail = string( "Assertion failure expected " ) + one.toString() + ", got " + two.toString(); FAIL( fail.c_str() ); } } @@ -652,6 +657,37 @@ } static const char *ns() { return "unittest.jstests.longutf8string"; } }; + + class InvalidUTF8Check { + public: + void run(){ + if( !globalScriptEngine->utf8Ok() ) + return; + + auto_ptr s; + s.reset( globalScriptEngine->newScope() ); + + BSONObj b; + { + char crap[5]; + + crap[0] = (char) 128; + crap[1] = 17; + crap[2] = (char) 128; + crap[3] = 17; + crap[4] = 0; + + BSONObjBuilder bb; + bb.append( "x" , crap ); + b = bb.obj(); + } + + //cout << "ELIOT: " << b.jsonString() << endl; + s->setThis( &b ); + // its ok if this is handled by js, just can't create a c++ exception + s->invoke( "x=this.x.length;" , BSONObj() ); + } + }; class CodeTests { public: @@ -701,7 +737,8 @@ { BSONObj fromA = client.findOne( _a , BSONObj() ); - cout << "Froma : " << fromA << endl; + assert( fromA.valid() ); + //cout << "Froma : " << fromA << endl; BSONObjBuilder b; b.append( "b" , 18 ); b.appendDBRef( "c" , "dbref.a" , fromA["_id"].__oid() ); @@ -771,7 +808,7 @@ { BSONObjBuilder b; b.append( "a" , 7 ); - b.appendBinData( "b" , 12 , ByteArray , foo ); + b.appendBinData( "b" , 12 , BinDataGeneral , foo ); in = b.obj(); s->setObject( "x" , in ); } @@ -788,11 +825,11 @@ // check that BinData js class is utilized s->invokeSafe( "q = x.b.toString();", BSONObj() ); stringstream expected; - expected << "BinData( type: " << ByteArray << ", base64: \"" << base64 << "\" )"; + expected << "BinData(" << BinDataGeneral << ",\"" << base64 << "\")"; ASSERT_EQUALS( expected.str(), s->getString( "q" ) ); stringstream scriptBuilder; - scriptBuilder << "z = { c : new BinData( " << ByteArray << ", \"" << base64 << "\" ) };"; + scriptBuilder << "z = { c : new BinData( " << BinDataGeneral << ", \"" << base64 << "\" ) };"; string script = scriptBuilder.str(); s->invokeSafe( script.c_str(), BSONObj() ); out = s->getObject( "z" ); @@ -842,7 +879,34 @@ s->invoke( f , empty ); ASSERT_EQUALS( 11 , s->getNumber( "return" ) ); } - cout << "speed1: " << ( n / t.millis() ) << " ops/ms" << endl; + //cout << "speed1: " << ( n / t.millis() ) << " ops/ms" << endl; + } + }; + + class ScopeOut { + public: + void run(){ + auto_ptr s; + s.reset( globalScriptEngine->newScope() ); + + s->invokeSafe( "x = 5;" , BSONObj() ); + { + BSONObjBuilder b; + s->append( b , "z" , "x" ); + ASSERT_EQUALS( BSON( "z" << 5 ) , b.obj() ); + } + + s->invokeSafe( "x = function(){ return 17; }" , BSONObj() ); + BSONObj temp; + { + BSONObjBuilder b; + s->append( b , "z" , "x" ); + temp = b.obj(); + s->setThis( &temp ); + } + + s->invokeSafe( "foo = this.z();" , BSONObj() ); + ASSERT_EQUALS( 17 , s->getNumber( "foo" ) ); } }; @@ -857,7 +921,7 @@ add< ResetScope >(); add< FalseTests >(); add< SimpleFunctions >(); - + add< ObjectMapping >(); add< ObjectDecoding >(); add< JSOIDTests >(); @@ -867,10 +931,8 @@ add< SpecialDBTypes >(); add< TypeConservation >(); add< NumberLong >(); - + add< WeirdObjects >(); - add< Utf8Check >(); - add< LongUtf8String >(); add< CodeTests >(); add< DBRefTest >(); add< InformalDBRef >(); @@ -879,6 +941,12 @@ add< VarTests >(); add< Speed1 >(); + + add< InvalidUTF8Check >(); + add< Utf8Check >(); + add< LongUtf8String >(); + + add< ScopeOut >(); } } myall; diff -Nru mongodb-1.4.4/dbtests/macrotests.cpp mongodb-1.6.3/dbtests/macrotests.cpp --- mongodb-1.4.4/dbtests/macrotests.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/dbtests/macrotests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,47 @@ +/* + * Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef MONGO_EXPOSE_MACROS + +#include "../client/dbclient.h" + +#ifdef malloc +# error malloc defined 0 +#endif + +#ifdef assert +# error assert defined 1 +#endif + +#include "../client/parallel.h" //uses assert + +#ifdef assert +# error assert defined 2 +#endif + +#include "../client/redef_macros.h" + +#ifndef assert +# error assert not defined 3 +#endif + +#include "../client/undef_macros.h" + +#ifdef assert +# error assert defined 3 +#endif + + diff -Nru mongodb-1.4.4/dbtests/matchertests.cpp mongodb-1.6.3/dbtests/matchertests.cpp --- mongodb-1.4.4/dbtests/matchertests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/matchertests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/matcher.h" #include "../db/json.h" @@ -96,6 +96,14 @@ } }; + class MixedNumericEmbedded { + public: + void run(){ + Matcher m( BSON( "a" << BSON( "x" << 1 ) ) ); + ASSERT( m.matches( BSON( "a" << BSON( "x" << 1 ) ) ) ); + ASSERT( m.matches( BSON( "a" << BSON( "x" << 1.0 ) ) ) ); + } + }; class Size { public: @@ -121,6 +129,7 @@ add< MixedNumericGt >(); add< MixedNumericIN >(); add< Size >(); + add< MixedNumericEmbedded >(); } } dball; diff -Nru mongodb-1.4.4/dbtests/mockdbclient.h mongodb-1.6.3/dbtests/mockdbclient.h --- mongodb-1.4.4/dbtests/mockdbclient.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/mockdbclient.h 2010-09-24 10:02:42.000000000 -0700 @@ -20,16 +20,21 @@ #include "../client/dbclient.h" #include "../db/commands.h" +#include "../db/replpair.h" class MockDBClientConnection : public DBClientConnection { public: MockDBClientConnection() : connect_() {} virtual - BSONObj findOne(const string &ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) { + BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) { return one_; } virtual - bool connect(const string &serverHostname, string& errmsg) { + bool connect(const char * serverHostname, string& errmsg) { + return connect_; + } + virtual + bool connect(const HostAndPort& , string& errmsg) { return connect_; } virtual @@ -62,11 +67,12 @@ rp_( rp ), cc_( cc ) { } - virtual BSONObj findOne(const string &ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) { + virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) { + BSONObj c = query.obj.copy(); if ( cc_ ) cc_->beforeCommand(); SetGlobalReplPair s( rp_ ); BSONObjBuilder result; - result.append( "ok", Command::runAgainstRegistered( "admin.$cmd", query.obj, result ) ? 1.0 : 0.0 ); + result.append( "ok", Command::runAgainstRegistered( "admin.$cmd", c, result ) ? 1.0 : 0.0 ); if ( cc_ ) cc_->afterCommand(); return result.obj(); } diff -Nru mongodb-1.4.4/dbtests/namespacetests.cpp mongodb-1.6.3/dbtests/namespacetests.cpp --- mongodb-1.4.4/dbtests/namespacetests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/namespacetests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ */ // Where IndexDetails defined. -#include "stdafx.h" +#include "pch.h" #include "../db/namespace.h" #include "../db/db.h" @@ -548,13 +548,13 @@ keys.clear(); id().getKeysFromObject( fromjson( "{a:1,b:null}" ), keys ); - cout << "YO : " << *(keys.begin()) << endl; + //cout << "YO : " << *(keys.begin()) << endl; checkSize(1, keys ); keys.clear(); id().getKeysFromObject( fromjson( "{a:1,b:[]}" ), keys ); checkSize(1, keys ); - cout << "YO : " << *(keys.begin()) << endl; + //cout << "YO : " << *(keys.begin()) << endl; ASSERT_EQUALS( NumberInt , keys.begin()->firstElement().type() ); keys.clear(); } @@ -591,7 +591,7 @@ ASSERT( userCreateNS( ns(), fromjson( spec() ), err, false ) ); } virtual string spec() const { - return "{\"capped\":true,\"size\":512}"; + return "{\"capped\":true,\"size\":512,\"$nExtents\":1}"; } int nRecords() const { int count = 0; @@ -675,6 +675,7 @@ void run() { create(); ASSERT_EQUALS( 2, nExtents() ); + BSONObj b = bigObj(); DiskLoc l[ 8 ]; @@ -699,13 +700,78 @@ } }; + /* test NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc loc) + */ + class TruncateCapped : public Base { + virtual string spec() const { + return "{\"capped\":true,\"size\":512,\"$nExtents\":2}"; + } + void pass(int p) { + create(); + ASSERT_EQUALS( 2, nExtents() ); + + BSONObj b = bigObj(); + + DiskLoc l[ 8 ]; + for ( int i = 0; i < 8; ++i ) { + l[ i ] = theDataFileMgr.insert( ns(), b.objdata(), b.objsize() ); + ASSERT( !l[ i ].isNull() ); + ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() ); + if ( i > 3 ) + ASSERT( l[ i ] == l[ i - 4 ] ); + } + + NamespaceDetails *nsd = nsdetails(ns()); + + DiskLoc last, first; + { + ReverseCappedCursor c(nsd); + last = c.currLoc(); + ASSERT( !last.isNull() ); + } + { + ForwardCappedCursor c(nsd); + first = c.currLoc(); + ASSERT( !first.isNull() ); + ASSERT( first != last ) ; + } + + DiskLoc d = l[6]; + long long n = nsd->nrecords; + nsd->cappedTruncateAfter(ns(), d, false); + ASSERT_EQUALS( nsd->nrecords , n-1 ); + + { + ForwardCappedCursor c(nsd); + ASSERT( first == c.currLoc() ); + } + { + ReverseCappedCursor c(nsd); + ASSERT( last != c.currLoc() ); // old last should be deleted + ASSERT( !last.isNull() ); + } + + // Too big + BSONObjBuilder bob; + bob.append( "a", string( 787, 'a' ) ); + BSONObj bigger = bob.done(); + ASSERT( theDataFileMgr.insert( ns(), bigger.objdata(), bigger.objsize() ).isNull() ); + ASSERT_EQUALS( 0, nRecords() ); + } + public: + void run() { +// log() << "******** NOT RUNNING TruncateCapped test yet ************" << endl; + pass(0); + } + }; + class Migrate : public Base { public: void run() { create(); - nsd()->deletedList[ 2 ] = nsd()->deletedList[ 0 ].drec()->nextDeleted.drec()->nextDeleted; - nsd()->deletedList[ 0 ].drec()->nextDeleted.drec()->nextDeleted = DiskLoc(); - nsd()->deletedList[ 1 ].Null(); + nsd()->deletedList[ 2 ] = nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted; + nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted = DiskLoc(); + nsd()->cappedLastDelRecLastExtent().Null(); NamespaceDetails *d = nsd(); zero( &d->capExtent ); zero( &d->capFirstNewRecord ); @@ -716,9 +782,9 @@ ASSERT( nsd()->capExtent.getOfs() != 0 ); ASSERT( !nsd()->capFirstNewRecord.isValid() ); int nDeleted = 0; - for ( DiskLoc i = nsd()->deletedList[ 0 ]; !i.isNull(); i = i.drec()->nextDeleted, ++nDeleted ); + for ( DiskLoc i = nsd()->cappedListOfAllDeletedRecords(); !i.isNull(); i = i.drec()->nextDeleted, ++nDeleted ); ASSERT_EQUALS( 10, nDeleted ); - ASSERT( nsd()->deletedList[ 1 ].isNull() ); + ASSERT( nsd()->cappedLastDelRecLastExtent().isNull() ); } private: static void zero( DiskLoc *d ) { @@ -741,7 +807,7 @@ // private: // virtual string spec() const { // // NOTE 256 added to size in _userCreateNS() - // long long big = MongoDataFile::maxSize() - MDFHeader::headerSize(); + // long long big = MongoDataFile::maxSize() - DataFileHeader::HeaderSize; // stringstream ss; // ss << "{\"capped\":true,\"size\":" << big << "}"; // return ss.str(); @@ -788,6 +854,7 @@ add< NamespaceDetailsTests::SingleAlloc >(); add< NamespaceDetailsTests::Realloc >(); add< NamespaceDetailsTests::TwoExtent >(); + add< NamespaceDetailsTests::TruncateCapped >(); add< NamespaceDetailsTests::Migrate >(); // add< NamespaceDetailsTests::BigCollection >(); add< NamespaceDetailsTests::Size >(); diff -Nru mongodb-1.4.4/dbtests/pairingtests.cpp mongodb-1.6.3/dbtests/pairingtests.cpp --- mongodb-1.4.4/dbtests/pairingtests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/pairingtests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,8 +17,8 @@ * along with this program. If not, see . */ -#include "stdafx.h" -#include "../db/replset.h" +#include "pch.h" +#include "../db/replpair.h" #include "dbtests.h" #include "mockdbclient.h" #include "../db/cmdline.h" @@ -195,7 +195,7 @@ TestableReplPair rp3( true, fromjson( "{ok:0}" ) ); rp3.arbitrate(); - ASSERT( rp3.state == ReplPair::State_Confused ); + ASSERT_EQUALS( rp3.state , ReplPair::State_Confused ); TestableReplPair rp4( true, fromjson( "{ok:1,you_are:1}" ) ); rp4.arbitrate(); diff -Nru mongodb-1.4.4/dbtests/pdfiletests.cpp mongodb-1.6.3/dbtests/pdfiletests.cpp --- mongodb-1.4.4/dbtests/pdfiletests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/pdfiletests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/pdfile.h" #include "../db/db.h" @@ -46,13 +46,13 @@ ASSERT( userCreateNS( ns(), fromjson( spec.str() ), err, false ) ); prepare(); int j = 0; - for ( auto_ptr< Cursor > i = theDataFileMgr.findAll( ns() ); + for ( boost::shared_ptr i = theDataFileMgr.findAll( ns() ); i->ok(); i->advance(), ++j ) ASSERT_EQUALS( j, i->current().firstElement().number() ); ASSERT_EQUALS( count(), j ); j = count() - 1; - for ( auto_ptr< Cursor > i = + for ( boost::shared_ptr i = findTableScan( ns(), fromjson( "{\"$natural\":-1}" ) ); i->ok(); i->advance(), --j ) ASSERT_EQUALS( j, i->current().firstElement().number() ); @@ -73,7 +73,7 @@ Extent *e = ext.ext(); int ofs; if ( e->lastRecord.isNull() ) - ofs = ext.getOfs() + ( e->extentData - (char *)e ); + ofs = ext.getOfs() + ( e->_extentData - (char *)e ); else ofs = e->lastRecord.getOfs() + e->lastRecord.rec()->lengthWithHeaders; DiskLoc dl( ext.a(), ofs ); @@ -296,7 +296,7 @@ b.appendTimestamp( "a" ); BSONObj o = b.done(); ASSERT( 0 == o.getField( "a" ).date() ); - theDataFileMgr.insert( ns(), o ); + theDataFileMgr.insertWithObjMod( ns(), o ); ASSERT( 0 != o.getField( "a" ).date() ); } }; diff -Nru mongodb-1.4.4/dbtests/perf/perftest.cpp mongodb-1.6.3/dbtests/perf/perftest.cpp --- mongodb-1.4.4/dbtests/perf/perftest.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/perf/perftest.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../../client/dbclient.h" #include "../../db/instance.h" @@ -42,7 +42,7 @@ // (ie allocation) work for another test. template< class T > string testDb( T *t = 0 ) { - string name = mongo::regression::demangleName( typeid( T ) ); + string name = mongo::demangleName( typeid( T ) ); // Make filesystem safe. for( string::iterator i = name.begin(); i != name.end(); ++i ) if ( *i == ':' ) @@ -629,7 +629,7 @@ } void run() { for( int i = 0; i < 10000; ++i ) - QueryPlanSet s( ns_.c_str(), BSONObj(), BSONObj(), &hintElt_ ); + MultiPlanScanner s( ns_.c_str(), BSONObj(), BSONObj(), &hintElt_ ); } string ns_; auto_ptr< dblock > lk_; @@ -650,7 +650,7 @@ void run() { Client::Context ctx( ns_ ); for( int i = 0; i < 10000; ++i ) - QueryPlanSet s( ns_.c_str(), BSONObj(), BSON( "a" << 1 ) ); + MultiPlanScanner s( ns_.c_str(), BSONObj(), BSON( "a" << 1 ) ); } string ns_; auto_ptr< dblock > lk_; @@ -669,7 +669,7 @@ void run() { Client::Context ctx( ns_.c_str() ); for( int i = 0; i < 10000; ++i ) - QueryPlanSet s( ns_.c_str(), BSON( "a" << 1 ), BSONObj() ); + MultiPlanScanner s( ns_.c_str(), BSON( "a" << 1 ), BSONObj() ); } string ns_; auto_ptr< dblock > lk_; diff -Nru mongodb-1.4.4/dbtests/queryoptimizertests.cpp mongodb-1.6.3/dbtests/queryoptimizertests.cpp --- mongodb-1.4.4/dbtests/queryoptimizertests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/queryoptimizertests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,22 +17,24 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/queryoptimizer.h" - #include "../db/db.h" #include "../db/dbhelpers.h" #include "../db/instance.h" #include "../db/query.h" - #include "dbtests.h" namespace mongo { extern BSONObj id_obj; - auto_ptr< QueryResult > runQuery(Message& m, QueryMessage& q ){ + void runQuery(Message& m, QueryMessage& q, Message &response ){ CurOp op( &(cc()) ); op.ensureStarted(); - return runQuery( m , q , op ); + runQuery( m , q , op, response ); + } + void runQuery(Message& m, QueryMessage& q ){ + Message response; + runQuery( m, q, response ); } } // namespace mongo @@ -43,7 +45,7 @@ public: virtual ~Base() {} void run() { - FieldRangeSet s( "ns", query() ); + const FieldRangeSet s( "ns", query() ); checkElt( lower(), s.range( "a" ).min() ); checkElt( upper(), s.range( "a" ).max() ); ASSERT_EQUALS( lowerInclusive(), s.range( "a" ).minInclusive() ); @@ -57,9 +59,8 @@ virtual bool upperInclusive() { return true; } static void checkElt( BSONElement expected, BSONElement actual ) { if ( expected.woCompare( actual, false ) ) { - stringstream ss; - ss << "expected: " << expected << ", got: " << actual; - FAIL( ss.str() ); + log() << "expected: " << expected << ", got: " << actual; + ASSERT( false ); } } }; @@ -130,7 +131,7 @@ class TwoGt : public Gt { virtual BSONObj query() { return BSON( "a" << GT << 0 << GT << 1 ); } }; - + class EqGte : public Eq { virtual BSONObj query() { return BSON( "a" << 1 << "a" << GTE << 1 ); } }; @@ -146,10 +147,10 @@ struct RegexBase : Base { void run() { //need to only look at first interval FieldRangeSet s( "ns", query() ); - checkElt( lower(), s.range( "a" ).intervals()[0].lower_.bound_ ); - checkElt( upper(), s.range( "a" ).intervals()[0].upper_.bound_ ); - ASSERT_EQUALS( lowerInclusive(), s.range( "a" ).intervals()[0].lower_.inclusive_ ); - ASSERT_EQUALS( upperInclusive(), s.range( "a" ).intervals()[0].upper_.inclusive_ ); + checkElt( lower(), s.range( "a" ).intervals()[0]._lower._bound ); + checkElt( upper(), s.range( "a" ).intervals()[0]._upper._bound ); + ASSERT_EQUALS( lowerInclusive(), s.range( "a" ).intervals()[0]._lower._inclusive ); + ASSERT_EQUALS( upperInclusive(), s.range( "a" ).intervals()[0]._upper._inclusive ); } }; @@ -325,14 +326,374 @@ vector< FieldInterval >::const_iterator j = intervals.begin(); double expected[] = { 3, 5, 9 }; for( int i = 0; i < 3; ++i, ++j ) { - ASSERT_EQUALS( expected[ i ], j->lower_.bound_.number() ); - ASSERT( j->lower_.inclusive_ ); - ASSERT( j->lower_ == j->upper_ ); + ASSERT_EQUALS( expected[ i ], j->_lower._bound.number() ); + ASSERT( j->_lower._inclusive ); + ASSERT( j->_lower == j->_upper ); } ASSERT( j == intervals.end() ); } }; + class DiffBase { + public: + virtual ~DiffBase() {} + void run() { + FieldRangeSet frs( "", fromjson( obj().toString() ) ); + FieldRange ret = frs.range( "a" ); + ret -= frs.range( "b" ); + check( ret ); + } + protected: + void check( const FieldRange &fr ) { + vector< FieldInterval > fi = fr.intervals(); + ASSERT_EQUALS( len(), fi.size() ); + int i = 0; + for( vector< FieldInterval >::const_iterator j = fi.begin(); j != fi.end(); ++j ) { + ASSERT_EQUALS( nums()[ i ], j->_lower._bound.numberInt() ); + ASSERT_EQUALS( incs()[ i ], j->_lower._inclusive ); + ++i; + ASSERT_EQUALS( nums()[ i ], j->_upper._bound.numberInt() ); + ASSERT_EQUALS( incs()[ i ], j->_upper._inclusive ); + ++i; + } + } + virtual unsigned len() const = 0; + virtual const int *nums() const = 0; + virtual const bool *incs() const = 0; + virtual BSONObj obj() const = 0; + }; + + class TwoRangeBase : public DiffBase { + public: + TwoRangeBase( string obj, int low, int high, bool lowI, bool highI ) + : _obj( obj ) { + _n[ 0 ] = low; + _n[ 1 ] = high; + _b[ 0 ] = lowI; + _b[ 1 ] = highI; + } + private: + virtual unsigned len() const { return 1; } + virtual const int *nums() const { return _n; } + virtual const bool *incs() const { return _b; } + virtual BSONObj obj() const { return fromjson( _obj ); } + string _obj; + int _n[ 2 ]; + bool _b[ 2 ]; + }; + + struct Diff1 : public TwoRangeBase { + Diff1() : TwoRangeBase( "{a:{$gt:1,$lt:2},b:{$gt:3,$lt:4}}", 1, 2, false, false ) {} + }; + + struct Diff2 : public TwoRangeBase { + Diff2() : TwoRangeBase( "{a:{$gt:1,$lt:2},b:{$gt:2,$lt:4}}", 1, 2, false, false ) {} + }; + + struct Diff3 : public TwoRangeBase { + Diff3() : TwoRangeBase( "{a:{$gt:1,$lte:2},b:{$gt:2,$lt:4}}", 1, 2, false, true ) {} + }; + + struct Diff4 : public TwoRangeBase { + Diff4() : TwoRangeBase( "{a:{$gt:1,$lt:2},b:{$gte:2,$lt:4}}", 1, 2, false, false) {} + }; + + struct Diff5 : public TwoRangeBase { + Diff5() : TwoRangeBase( "{a:{$gt:1,$lte:2},b:{$gte:2,$lt:4}}", 1, 2, false, false) {} + }; + + struct Diff6 : public TwoRangeBase { + Diff6() : TwoRangeBase( "{a:{$gt:1,$lte:3},b:{$gte:2,$lt:4}}", 1, 2, false, false) {} + }; + + struct Diff7 : public TwoRangeBase { + Diff7() : TwoRangeBase( "{a:{$gt:1,$lte:3},b:{$gt:2,$lt:4}}", 1, 2, false, true) {} + }; + + struct Diff8 : public TwoRangeBase { + Diff8() : TwoRangeBase( "{a:{$gt:1,$lt:4},b:{$gt:2,$lt:4}}", 1, 2, false, true) {} + }; + + struct Diff9 : public TwoRangeBase { + Diff9() : TwoRangeBase( "{a:{$gt:1,$lt:4},b:{$gt:2,$lte:4}}", 1, 2, false, true) {} + }; + + struct Diff10 : public TwoRangeBase { + Diff10() : TwoRangeBase( "{a:{$gt:1,$lte:4},b:{$gt:2,$lte:4}}", 1, 2, false, true) {} + }; + + struct Diff11 : public TwoRangeBase { + Diff11() : TwoRangeBase( "{a:{$gt:1,$lte:4},b:{$gt:2,$lt:4}}", 1, 4, false, true) {} + }; + + struct Diff12 : public TwoRangeBase { + Diff12() : TwoRangeBase( "{a:{$gt:1,$lt:5},b:{$gt:2,$lt:4}}", 1, 5, false, false) {} + }; + + struct Diff13 : public TwoRangeBase { + Diff13() : TwoRangeBase( "{a:{$gt:1,$lt:5},b:{$gt:1,$lt:4}}", 4, 5, true, false) {} + }; + + struct Diff14 : public TwoRangeBase { + Diff14() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:{$gt:1,$lt:4}}", 1, 5, true, false) {} + }; + + struct Diff15 : public TwoRangeBase { + Diff15() : TwoRangeBase( "{a:{$gt:1,$lt:5},b:{$gte:1,$lt:4}}", 4, 5, true, false) {} + }; + + struct Diff16 : public TwoRangeBase { + Diff16() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:{$gte:1,$lt:4}}", 4, 5, true, false) {} + }; + + struct Diff17 : public TwoRangeBase { + Diff17() : TwoRangeBase( "{a:{$gt:1,$lt:5},b:{$gt:0,$lt:4}}", 4, 5, true, false) {} + }; + + struct Diff18 : public TwoRangeBase { + Diff18() : TwoRangeBase( "{a:{$gt:1,$lt:5},b:{$gt:0,$lte:4}}", 4, 5, false, false) {} + }; + + struct Diff19 : public TwoRangeBase { + Diff19() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gte:0,$lte:1}}", 1, 5, false, true) {} + }; + + struct Diff20 : public TwoRangeBase { + Diff20() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:{$gte:0,$lte:1}}", 1, 5, false, true) {} + }; + + struct Diff21 : public TwoRangeBase { + Diff21() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gte:0,$lt:1}}", 1, 5, true, true) {} + }; + + struct Diff22 : public TwoRangeBase { + Diff22() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:{$gte:0,$lt:1}}", 1, 5, false, true) {} + }; + + struct Diff23 : public TwoRangeBase { + Diff23() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:{$gte:0,$lt:0.5}}", 1, 5, false, true) {} + }; + + struct Diff24 : public TwoRangeBase { + Diff24() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:0}", 1, 5, false, true) {} + }; + + struct Diff25 : public TwoRangeBase { + Diff25() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:0}", 1, 5, true, true) {} + }; + + struct Diff26 : public TwoRangeBase { + Diff26() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:1}", 1, 5, false, true) {} + }; + + struct Diff27 : public TwoRangeBase { + Diff27() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:1}", 1, 5, false, true) {} + }; + + struct Diff28 : public TwoRangeBase { + Diff28() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:3}", 1, 5, true, true) {} + }; + + struct Diff29 : public TwoRangeBase { + Diff29() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:5}", 1, 5, true, false) {} + }; + + struct Diff30 : public TwoRangeBase { + Diff30() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:5}", 1, 5, true, false) {} + }; + + struct Diff31 : public TwoRangeBase { + Diff31() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:6}", 1, 5, true, false) {} + }; + + struct Diff32 : public TwoRangeBase { + Diff32() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:6}", 1, 5, true, true) {} + }; + + class EmptyBase : public DiffBase { + public: + EmptyBase( string obj ) + : _obj( obj ) {} + private: + virtual unsigned len() const { return 0; } + virtual const int *nums() const { return 0; } + virtual const bool *incs() const { return 0; } + virtual BSONObj obj() const { return fromjson( _obj ); } + string _obj; + }; + + struct Diff33 : public EmptyBase { + Diff33() : EmptyBase( "{a:{$gte:1,$lte:5},b:{$gt:0,$lt:6}}" ) {} + }; + + struct Diff34 : public EmptyBase { + Diff34() : EmptyBase( "{a:{$gte:1,$lte:5},b:{$gte:1,$lt:6}}" ) {} + }; + + struct Diff35 : public EmptyBase { + Diff35() : EmptyBase( "{a:{$gt:1,$lte:5},b:{$gte:1,$lt:6}}" ) {} + }; + + struct Diff36 : public EmptyBase { + Diff36() : EmptyBase( "{a:{$gt:1,$lte:5},b:{$gt:1,$lt:6}}" ) {} + }; + + struct Diff37 : public TwoRangeBase { + Diff37() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gt:1,$lt:6}}", 1, 1, true, true ) {} + }; + + struct Diff38 : public EmptyBase { + Diff38() : EmptyBase( "{a:{$gt:1,$lt:5},b:{$gt:0,$lt:5}}" ) {} + }; + + struct Diff39 : public EmptyBase { + Diff39() : EmptyBase( "{a:{$gt:1,$lt:5},b:{$gt:0,$lte:5}}" ) {} + }; + + struct Diff40 : public EmptyBase { + Diff40() : EmptyBase( "{a:{$gt:1,$lte:5},b:{$gt:0,$lte:5}}" ) {} + }; + + struct Diff41 : public TwoRangeBase { + Diff41() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gt:0,$lt:5}}", 5, 5, true, true ) {} + }; + + struct Diff42 : public EmptyBase { + Diff42() : EmptyBase( "{a:{$gt:1,$lt:5},b:{$gt:1,$lt:5}}" ) {} + }; + + struct Diff43 : public EmptyBase { + Diff43() : EmptyBase( "{a:{$gt:1,$lt:5},b:{$gt:1,$lte:5}}" ) {} + }; + + struct Diff44 : public EmptyBase { + Diff44() : EmptyBase( "{a:{$gt:1,$lt:5},b:{$gte:1,$lt:5}}" ) {} + }; + + struct Diff45 : public EmptyBase { + Diff45() : EmptyBase( "{a:{$gt:1,$lt:5},b:{$gte:1,$lte:5}}" ) {} + }; + + struct Diff46 : public TwoRangeBase { + Diff46() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:{$gt:1,$lt:5}}", 5, 5, true, true ) {} + }; + + struct Diff47 : public EmptyBase { + Diff47() : EmptyBase( "{a:{$gt:1,$lte:5},b:{$gt:1,$lte:5}}" ) {} + }; + + struct Diff48 : public TwoRangeBase { + Diff48() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:{$gte:1,$lt:5}}", 5, 5, true, true ) {} + }; + + struct Diff49 : public EmptyBase { + Diff49() : EmptyBase( "{a:{$gt:1,$lte:5},b:{$gte:1,$lte:5}}" ) {} + }; + + struct Diff50 : public TwoRangeBase { + Diff50() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:{$gt:1,$lt:5}}", 1, 1, true, true ) {} + }; + + struct Diff51 : public TwoRangeBase { + Diff51() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:{$gt:1,$lte:5}}", 1, 1, true, true ) {} + }; + + struct Diff52 : public EmptyBase { + Diff52() : EmptyBase( "{a:{$gte:1,$lt:5},b:{$gte:1,$lt:5}}" ) {} + }; + + struct Diff53 : public EmptyBase { + Diff53() : EmptyBase( "{a:{$gte:1,$lt:5},b:{$gte:1,$lte:5}}" ) {} + }; + + struct Diff54 : public TwoRangeBase { + Diff54() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gt:1,$lt:5}}", 1, 5, true, true ) {} + }; + + struct Diff55 : public TwoRangeBase { + Diff55() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gt:1,$lte:5}}", 1, 1, true, true ) {} + }; + + struct Diff56 : public TwoRangeBase { + Diff56() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gte:1,$lt:5}}", 5, 5, true, true ) {} + }; + + struct Diff57 : public EmptyBase { + Diff57() : EmptyBase( "{a:{$gte:1,$lte:5},b:{$gte:1,$lte:5}}" ) {} + }; + + struct Diff58 : public TwoRangeBase { + Diff58() : TwoRangeBase( "{a:1,b:{$gt:1,$lt:5}}", 1, 1, true, true ) {} + }; + + struct Diff59 : public EmptyBase { + Diff59() : EmptyBase( "{a:1,b:{$gte:1,$lt:5}}" ) {} + }; + + struct Diff60 : public EmptyBase { + Diff60() : EmptyBase( "{a:2,b:{$gte:1,$lt:5}}" ) {} + }; + + struct Diff61 : public EmptyBase { + Diff61() : EmptyBase( "{a:5,b:{$gte:1,$lte:5}}" ) {} + }; + + struct Diff62 : public TwoRangeBase { + Diff62() : TwoRangeBase( "{a:5,b:{$gt:1,$lt:5}}", 5, 5, true, true ) {} + }; + + struct Diff63 : public EmptyBase { + Diff63() : EmptyBase( "{a:5,b:5}" ) {} + }; + + class DiffMulti1 : public DiffBase { + public: + void run() { + FieldRangeSet frs( "", fromjson( "{a:{$gt:1,$lt:9},b:{$gt:0,$lt:2},c:3,d:{$gt:4,$lt:5},e:{$gt:7,$lt:10}}" ) ); + FieldRange ret = frs.range( "a" ); + FieldRange other = frs.range( "b" ); + other |= frs.range( "c" ); + other |= frs.range( "d" ); + other |= frs.range( "e" ); + ret -= other; + check( ret ); + } + protected: + virtual unsigned len() const { return 1; } + virtual const int *nums() const { static int n[] = { 2, 7 }; return n; } + virtual const bool *incs() const { static bool b[] = { true, true }; return b; } + virtual BSONObj obj() const { return BSONObj(); } + }; + + class DiffMulti2 : public DiffBase { + public: + void run() { + FieldRangeSet frs( "", fromjson( "{a:{$gt:1,$lt:9},b:{$gt:0,$lt:2},c:3,d:{$gt:4,$lt:5},e:{$gt:7,$lt:10}}" ) ); + FieldRange mask = frs.range( "a" ); + FieldRange ret = frs.range( "b" ); + ret |= frs.range( "c" ); + ret |= frs.range( "d" ); + ret |= frs.range( "e" ); + ret -= mask; + check( ret ); + } + protected: + virtual unsigned len() const { return 2; } + virtual const int *nums() const { static int n[] = { 0, 1, 9, 10 }; return n; } + virtual const bool *incs() const { static bool b[] = { false, true, true, false }; return b; } + virtual BSONObj obj() const { return BSONObj(); } + }; + + class SetIntersect { + public: + void run() { + FieldRangeSet frs1( "", fromjson( "{b:{$in:[5,6]},c:7,d:{$in:[8,9]}}" ) ); + FieldRangeSet frs2( "", fromjson( "{a:1,b:5,c:{$in:[7,8]},d:{$in:[8,9]},e:10}" ) ); + frs1 &= frs2; + ASSERT_EQUALS( fromjson( "{a:1,b:5,c:7,d:{$gte:8,$lte:9},e:10}" ), frs1.simplifiedQuery( BSONObj() ) ); + } + }; + } // namespace FieldRangeTests namespace QueryPlanTests { @@ -369,12 +730,10 @@ return nsd()->idxNo( *index(key) ); } BSONObj startKey( const QueryPlan &p ) const { - BoundList bl = p.indexBounds(); - return bl[ 0 ].first.getOwned(); + return p.frv()->startKey(); } BSONObj endKey( const QueryPlan &p ) const { - BoundList bl = p.indexBounds(); - return bl[ bl.size() - 1 ].second.getOwned(); + return p.frv()->endKey(); } private: dblock lk_; @@ -393,7 +752,7 @@ class NoIndex : public Base { public: void run() { - QueryPlan p( nsd(), -1, FBS( BSONObj() ), BSONObj() ); + QueryPlan p( nsd(), -1, FBS( BSONObj() ), BSONObj(), BSONObj() ); ASSERT( !p.optimal() ); ASSERT( !p.scanAndOrderRequired() ); ASSERT( !p.exactKeyMatch() ); @@ -410,13 +769,13 @@ b2.appendMaxKey( "" ); BSONObj end = b2.obj(); - QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) ); ASSERT( !p.scanAndOrderRequired() ); ASSERT( !startKey( p ).woCompare( start ) ); ASSERT( !endKey( p ).woCompare( end ) ); - QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 << "b" << 1 ) ); + QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << 1 ) ); ASSERT( !p2.scanAndOrderRequired() ); - QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSON( "b" << 1 ) ); + QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "b" << 1 ) ); ASSERT( p3.scanAndOrderRequired() ); ASSERT( !startKey( p3 ).woCompare( start ) ); ASSERT( !endKey( p3 ).woCompare( end ) ); @@ -426,7 +785,7 @@ class MoreIndexThanNeeded : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) ); ASSERT( !p.scanAndOrderRequired() ); } }; @@ -434,13 +793,13 @@ class IndexSigns : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << -1 ) , FBS( BSONObj() ), BSON( "a" << 1 << "b" << -1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << -1 ) , FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) ); ASSERT( !p.scanAndOrderRequired() ); ASSERT_EQUALS( 1, p.direction() ); - QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 << "b" << -1 ) ); + QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) ); ASSERT( p2.scanAndOrderRequired() ); ASSERT_EQUALS( 0, p2.direction() ); - QueryPlan p3( nsd(), indexno( id_obj ), FBS( BSONObj() ), BSON( "_id" << 1 ) ); + QueryPlan p3( nsd(), indexno( id_obj ), FBS( BSONObj() ), BSONObj(), BSON( "_id" << 1 ) ); ASSERT( !p3.scanAndOrderRequired() ); ASSERT_EQUALS( 1, p3.direction() ); } @@ -457,15 +816,15 @@ b2.appendMaxKey( "" ); b2.appendMinKey( "" ); BSONObj end = b2.obj(); - QueryPlan p( nsd(), INDEXNO( "a" << -1 << "b" << 1 ),FBS( BSONObj() ), BSON( "a" << 1 << "b" << -1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << -1 << "b" << 1 ),FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) ); ASSERT( !p.scanAndOrderRequired() ); ASSERT_EQUALS( -1, p.direction() ); ASSERT( !startKey( p ).woCompare( start ) ); ASSERT( !endKey( p ).woCompare( end ) ); - QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSON( "a" << -1 << "b" << -1 ) ); + QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << -1 << "b" << -1 ) ); ASSERT( !p2.scanAndOrderRequired() ); ASSERT_EQUALS( -1, p2.direction() ); - QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << -1 ), FBS( BSONObj() ), BSON( "a" << -1 << "b" << -1 ) ); + QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << -1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << -1 << "b" << -1 ) ); ASSERT( p3.scanAndOrderRequired() ); ASSERT_EQUALS( 0, p3.direction() ); } @@ -482,11 +841,11 @@ b2.append( "", 3 ); b2.appendMaxKey( "" ); BSONObj end = b2.obj(); - QueryPlan p( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), BSONObj() ); + QueryPlan p( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), BSON( "a" << 3 ), BSONObj() ); ASSERT( !p.scanAndOrderRequired() ); ASSERT( !startKey( p ).woCompare( start ) ); ASSERT( !endKey( p ).woCompare( end ) ); - QueryPlan p2( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), BSONObj() ); + QueryPlan p2( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), BSON( "a" << 3 ), BSONObj() ); ASSERT( !p2.scanAndOrderRequired() ); ASSERT( !startKey( p ).woCompare( start ) ); ASSERT( !endKey( p ).woCompare( end ) ); @@ -496,11 +855,11 @@ class EqualWithOrder : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 4 ) ), BSON( "b" << 1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 4 ) ), BSON( "a" << 4 ), BSON( "b" << 1 ) ); ASSERT( !p.scanAndOrderRequired() ); - QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "b" << 4 ) ), BSON( "a" << 1 << "c" << 1 ) ); + QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "b" << 4 ) ), BSON( "b" << 4 ), BSON( "a" << 1 << "c" << 1 ) ); ASSERT( !p2.scanAndOrderRequired() ); - QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 4 ) ), BSON( "a" << 1 << "c" << 1 ) ); + QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 4 ) ), BSON( "b" << 4 ), BSON( "a" << 1 << "c" << 1 ) ); ASSERT( p3.scanAndOrderRequired() ); } }; @@ -508,23 +867,23 @@ class Optimal : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) ); ASSERT( p.optimal() ); - QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 ) ); + QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) ); ASSERT( p2.optimal() ); - QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "a" << 1 ) ); + QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSON( "a" << 1 ) ); ASSERT( p3.optimal() ); - QueryPlan p4( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "a" << 1 ) ); + QueryPlan p4( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSON( "a" << 1 ) ); ASSERT( !p4.optimal() ); - QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "b" << 1 ) ); + QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSON( "b" << 1 ) ); ASSERT( p5.optimal() ); - QueryPlan p6( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "b" << 1 ) ); + QueryPlan p6( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSON( "b" << 1 ) ); ASSERT( !p6.optimal() ); - QueryPlan p7( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << 1 ) ), BSON( "a" << 1 ) ); + QueryPlan p7( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << 1 ) ), BSON( "a" << 1 << "b" << 1 ), BSON( "a" << 1 ) ); ASSERT( p7.optimal() ); - QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 ) ); + QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSON( "a" << 1 ) ); ASSERT( p8.optimal() ); - QueryPlan p9( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 ) ); + QueryPlan p9( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSON( "a" << 1 ) ); ASSERT( p9.optimal() ); } }; @@ -532,13 +891,13 @@ class MoreOptimal : public Base { public: void run() { - QueryPlan p10( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 ) ), BSONObj() ); + QueryPlan p10( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSONObj() ); ASSERT( p10.optimal() ); - QueryPlan p11( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSONObj() ); + QueryPlan p11( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSONObj() ); ASSERT( p11.optimal() ); - QueryPlan p12( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), BSONObj() ); + QueryPlan p12( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), BSON( "a" << LT << 1 ), BSONObj() ); ASSERT( p12.optimal() ); - QueryPlan p13( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), BSON( "a" << 1 ) ); + QueryPlan p13( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), BSON( "a" << LT << 1 ), BSON( "a" << 1 ) ); ASSERT( p13.optimal() ); } }; @@ -546,23 +905,23 @@ class KeyMatch : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) ); ASSERT( !p.exactKeyMatch() ); - QueryPlan p2( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 ) ); + QueryPlan p2( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) ); ASSERT( !p2.exactKeyMatch() ); - QueryPlan p3( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSON( "b" << "z" ) ), BSON( "a" << 1 ) ); + QueryPlan p3( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSON( "b" << "z" ) ), BSON( "b" << "z" ), BSON( "a" << 1 ) ); ASSERT( !p3.exactKeyMatch() ); - QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), BSON( "a" << 1 ) ); + QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), BSON( "c" << "y" << "b" << "z" ), BSON( "a" << 1 ) ); ASSERT( !p4.exactKeyMatch() ); - QueryPlan p5( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), BSONObj() ); + QueryPlan p5( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), BSON( "c" << "y" << "b" << "z" ), BSONObj() ); ASSERT( !p5.exactKeyMatch() ); - QueryPlan p6( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << LT << "y" << "b" << GT << "z" ) ), BSONObj() ); + QueryPlan p6( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << LT << "y" << "b" << GT << "z" ) ), BSON( "c" << LT << "y" << "b" << GT << "z" ), BSONObj() ); ASSERT( !p6.exactKeyMatch() ); - QueryPlan p7( nsd(), INDEXNO( "b" << 1 ), FBS( BSONObj() ), BSON( "a" << 1 ) ); + QueryPlan p7( nsd(), INDEXNO( "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) ); ASSERT( !p7.exactKeyMatch() ); - QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << "y" << "a" << "z" ) ), BSONObj() ); + QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << "y" << "a" << "z" ) ), BSON( "b" << "y" << "a" << "z" ), BSONObj() ); ASSERT( p8.exactKeyMatch() ); - QueryPlan p9( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "z" ) ), BSON( "a" << 1 ) ); + QueryPlan p9( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "z" ) ), BSON( "a" << "z" ), BSON( "a" << 1 ) ); ASSERT( p9.exactKeyMatch() ); } }; @@ -570,7 +929,7 @@ class MoreKeyMatch : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "r" << "b" << NE << "q" ) ), BSON( "a" << 1 ) ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "r" << "b" << NE << "q" ) ), BSON( "a" << "r" << "b" << NE << "q" ), BSON( "a" << 1 ) ); ASSERT( !p.exactKeyMatch() ); } }; @@ -578,17 +937,18 @@ class ExactKeyQueryTypes : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "b" ) ), BSONObj() ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "b" ) ), BSON( "a" << "b" ), BSONObj() ); ASSERT( p.exactKeyMatch() ); - QueryPlan p2( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << 4 ) ), BSONObj() ); + QueryPlan p2( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << 4 ) ), BSON( "a" << 4 ), BSONObj() ); ASSERT( !p2.exactKeyMatch() ); - QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << BSON( "c" << "d" ) ) ), BSONObj() ); + QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << BSON( "c" << "d" ) ) ), BSON( "a" << BSON( "c" << "d" ) ), BSONObj() ); ASSERT( !p3.exactKeyMatch() ); BSONObjBuilder b; b.appendRegex( "a", "^ddd" ); - QueryPlan p4( nsd(), INDEXNO( "a" << 1 ), FBS( b.obj() ), BSONObj() ); + BSONObj q = b.obj(); + QueryPlan p4( nsd(), INDEXNO( "a" << 1 ), FBS( q ), q, BSONObj() ); ASSERT( !p4.exactKeyMatch() ); - QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << "z" << "b" << 4 ) ), BSONObj() ); + QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << "z" << "b" << 4 ) ), BSON( "a" << "z" << "b" << 4 ), BSONObj() ); ASSERT( !p5.exactKeyMatch() ); } }; @@ -596,17 +956,17 @@ class Unhelpful : public Base { public: void run() { - QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSONObj() ); + QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSONObj() ); ASSERT( !p.range( "a" ).nontrivial() ); ASSERT( p.unhelpful() ); - QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), BSON( "a" << 1 ) ); + QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), BSON( "b" << 1 << "c" << 1 ), BSON( "a" << 1 ) ); ASSERT( !p2.scanAndOrderRequired() ); ASSERT( !p2.range( "a" ).nontrivial() ); ASSERT( !p2.unhelpful() ); - QueryPlan p3( nsd(), INDEXNO( "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), BSONObj() ); + QueryPlan p3( nsd(), INDEXNO( "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), BSON( "b" << 1 << "c" << 1 ), BSONObj() ); ASSERT( p3.range( "b" ).nontrivial() ); ASSERT( !p3.unhelpful() ); - QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "c" << 1 ), FBS( BSON( "c" << 1 << "d" << 1 ) ), BSONObj() ); + QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "c" << 1 ), FBS( BSON( "c" << 1 << "d" << 1 ) ), BSON( "c" << 1 << "d" << 1 ), BSONObj() ); ASSERT( !p4.range( "b" ).nontrivial() ); ASSERT( p4.unhelpful() ); } @@ -621,7 +981,7 @@ string err; userCreateNS( ns(), BSONObj(), err, false ); } - ~Base() { + virtual ~Base() { if ( !nsd() ) return; NamespaceDetailsTransient::_get( ns() ).clearQueryCache(); @@ -632,10 +992,10 @@ // see query.h for the protocol we are using here. BufBuilder b; int opts = queryOptions; - b.append(opts); - b.append(ns.c_str()); - b.append(nToSkip); - b.append(nToReturn); + b.appendNum(opts); + b.appendStr(ns); + b.appendNum(nToSkip); + b.appendNum(nToReturn); query.appendSelfToBufBuilder(b); if ( fieldsToReturn ) fieldsToReturn->appendSelfToBufBuilder(b); @@ -652,7 +1012,8 @@ class NoIndexes : public Base { public: void run() { - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); ASSERT_EQUALS( 1, s.nPlans() ); } }; @@ -662,7 +1023,8 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "b_2" ); - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSONObj() ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSONObj() ); ASSERT_EQUALS( 1, s.nPlans() ); } }; @@ -672,7 +1034,8 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); ASSERT_EQUALS( 3, s.nPlans() ); } }; @@ -682,7 +1045,8 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); - QueryPlanSet s( ns(), BSONObj(), BSONObj() ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSONObj() ) ); + QueryPlanSet s( ns(), frs, BSONObj(), BSONObj() ); ASSERT_EQUALS( 1, s.nPlans() ); } }; @@ -694,7 +1058,8 @@ Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); BSONObj b = BSON( "hint" << BSON( "a" << 1 ) ); BSONElement e = b.firstElement(); - QueryPlanSet s( ns(), BSON( "a" << 1 ), BSON( "b" << 1 ), &e ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e ); ASSERT_EQUALS( 1, s.nPlans() ); } }; @@ -706,7 +1071,8 @@ Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); BSONObj b = BSON( "hint" << "a_1" ); BSONElement e = b.firstElement(); - QueryPlanSet s( ns(), BSON( "a" << 1 ), BSON( "b" << 1 ), &e ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e ); ASSERT_EQUALS( 1, s.nPlans() ); } }; @@ -718,7 +1084,8 @@ Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); BSONObj b = BSON( "hint" << BSON( "$natural" << 1 ) ); BSONElement e = b.firstElement(); - QueryPlanSet s( ns(), BSON( "a" << 1 ), BSON( "b" << 1 ), &e ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e ); ASSERT_EQUALS( 1, s.nPlans() ); } }; @@ -728,7 +1095,8 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "b_2" ); - QueryPlanSet s( ns(), BSON( "a" << 1 ), BSON( "$natural" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "$natural" << 1 ) ); ASSERT_EQUALS( 1, s.nPlans() ); } }; @@ -738,7 +1106,8 @@ void run() { BSONObj b = BSON( "hint" << "a_1" ); BSONElement e = b.firstElement(); - ASSERT_EXCEPTION( QueryPlanSet s( ns(), BSON( "a" << 1 ), BSON( "b" << 1 ), &e ), + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) ); + ASSERT_EXCEPTION( QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e ), AssertionException ); } }; @@ -753,11 +1122,11 @@ BSONObj one = BSON( "a" << 1 ); BSONObj fourA = BSON( "a" << 4 ); BSONObj fourB = BSON( "a" << 4 ); - theDataFileMgr.insert( ns(), one ); + theDataFileMgr.insertWithObjMod( ns(), one ); ASSERT_EQUALS( 0, runCount( ns(), BSON( "query" << BSON( "a" << 4 ) ), err ) ); - theDataFileMgr.insert( ns(), fourA ); + theDataFileMgr.insertWithObjMod( ns(), fourA ); ASSERT_EQUALS( 1, runCount( ns(), BSON( "query" << BSON( "a" << 4 ) ), err ) ); - theDataFileMgr.insert( ns(), fourB ); + theDataFileMgr.insertWithObjMod( ns(), fourB ); ASSERT_EQUALS( 2, runCount( ns(), BSON( "query" << BSON( "a" << 4 ) ), err ) ); ASSERT_EQUALS( 3, runCount( ns(), BSON( "query" << BSONObj() ), err ) ); ASSERT_EQUALS( 3, runCount( ns(), BSON( "query" << BSON( "a" << GT << 0 ) ), err ) ); @@ -770,15 +1139,20 @@ class QueryMissingNs : public Base { public: + QueryMissingNs() { log() << "querymissingns starts" << endl; } + ~QueryMissingNs() { + log() << "end QueryMissingNs" << endl; + } void run() { Message m; assembleRequest( "unittests.missingNS", BSONObj(), 0, 0, 0, 0, m ); - stringstream ss; - DbMessage d(m); QueryMessage q(d); - ASSERT_EQUALS( 0, runQuery( m, q)->nReturned ); + Message ret; + runQuery( m, q, ret ); + ASSERT_EQUALS( 0, ((QueryResult*)ret.header())->nReturned ); } + }; class UnhelpfulIndex : public Base { @@ -786,7 +1160,8 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); - QueryPlanSet s( ns(), BSON( "a" << 1 << "c" << 2 ), BSONObj() ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 << "c" << 2 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 1 << "c" << 2 ), BSONObj() ); ASSERT_EQUALS( 2, s.nPlans() ); } }; @@ -796,21 +1171,22 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); ASSERT_EQUALS( 3, s.nPlans() ); bool threw = false; auto_ptr< TestOp > t( new TestOp( true, threw ) ); boost::shared_ptr< TestOp > done = s.runOp( *t ); ASSERT( threw ); ASSERT( done->complete() ); - ASSERT( done->exceptionMessage().empty() ); + ASSERT( done->exception().empty() ); ASSERT( !done->error() ); } private: class TestOp : public QueryOp { public: TestOp( bool iThrow, bool &threw ) : iThrow_( iThrow ), threw_( threw ), i_(), youThrow_( false ) {} - virtual void init() {} + virtual void _init() {} virtual void next() { if ( iThrow_ ) threw_ = true; @@ -818,7 +1194,7 @@ if ( ++i_ > 10 ) setComplete(); } - virtual QueryOp *clone() const { + virtual QueryOp *_createChild() const { QueryOp *op = new TestOp( youThrow_, threw_ ); youThrow_ = !youThrow_; return op; @@ -837,22 +1213,23 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); ASSERT_EQUALS( 3, s.nPlans() ); auto_ptr< TestOp > t( new TestOp() ); boost::shared_ptr< TestOp > done = s.runOp( *t ); ASSERT( !done->complete() ); - ASSERT_EQUALS( "throw", done->exceptionMessage() ); + ASSERT_EQUALS( "throw", done->exception().msg ); ASSERT( done->error() ); } private: class TestOp : public QueryOp { public: - virtual void init() {} + virtual void _init() {} virtual void next() { massert( 10409 , "throw", false ); } - virtual QueryOp *clone() const { + virtual QueryOp *_createChild() const { return new TestOp(); } virtual bool mayRecordPlan() const { return true; } @@ -883,19 +1260,22 @@ } nPlans( 3 ); - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); NoRecordTestOp original; s.runOp( original ); nPlans( 3 ); BSONObj hint = fromjson( "{hint:{$natural:1}}" ); BSONElement hintElt = hint.firstElement(); - QueryPlanSet s2( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ), &hintElt ); + auto_ptr< FieldRangeSet > frs2( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s2( ns(), frs2, BSON( "a" << 4 ), BSON( "b" << 1 ), &hintElt ); TestOp newOriginal; s2.runOp( newOriginal ); nPlans( 3 ); - QueryPlanSet s3( ns(), BSON( "a" << 4 ), BSON( "b" << 1 << "c" << 1 ) ); + auto_ptr< FieldRangeSet > frs3( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s3( ns(), frs3, BSON( "a" << 4 ), BSON( "b" << 1 << "c" << 1 ) ); TestOp newerOriginal; s3.runOp( newerOriginal ); nPlans( 3 ); @@ -905,28 +1285,30 @@ } private: void nPlans( int n ) { - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); ASSERT_EQUALS( n, s.nPlans() ); } void runQuery() { - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); TestOp original; s.runOp( original ); } class TestOp : public QueryOp { public: - virtual void init() {} + virtual void _init() {} virtual void next() { setComplete(); } - virtual QueryOp *clone() const { + virtual QueryOp *_createChild() const { return new TestOp(); } virtual bool mayRecordPlan() const { return true; } }; class NoRecordTestOp : public TestOp { virtual bool mayRecordPlan() const { return false; } - virtual QueryOp *clone() const { return new NoRecordTestOp(); } + virtual QueryOp *_createChild() const { return new NoRecordTestOp(); } }; }; @@ -935,26 +1317,28 @@ void run() { Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); - QueryPlanSet s( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) ); ScanOnlyTestOp op; s.runOp( op ); ASSERT( fromjson( "{$natural:1}" ).woCompare( NamespaceDetailsTransient::_get( ns() ).indexForPattern( s.fbs().pattern( BSON( "b" << 1 ) ) ) ) == 0 ); ASSERT_EQUALS( 1, NamespaceDetailsTransient::_get( ns() ).nScannedForPattern( s.fbs().pattern( BSON( "b" << 1 ) ) ) ); - QueryPlanSet s2( ns(), BSON( "a" << 4 ), BSON( "b" << 1 ) ); + auto_ptr< FieldRangeSet > frs2( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) ); + QueryPlanSet s2( ns(), frs2, BSON( "a" << 4 ), BSON( "b" << 1 ) ); TestOp op2; ASSERT( s2.runOp( op2 )->complete() ); } private: class TestOp : public QueryOp { public: - virtual void init() {} + virtual void _init() {} virtual void next() { if ( qp().indexKey().firstElement().fieldName() == string( "$natural" ) ) massert( 10410 , "throw", false ); setComplete(); } - virtual QueryOp *clone() const { + virtual QueryOp *_createChild() const { return new TestOp(); } virtual bool mayRecordPlan() const { return true; } @@ -965,7 +1349,7 @@ setComplete(); massert( 10411 , "throw", false ); } - virtual QueryOp *clone() const { + virtual QueryOp *_createChild() const { return new ScanOnlyTestOp(); } }; @@ -975,7 +1359,7 @@ public: void run() { BSONObj one = BSON( "a" << 1 ); - theDataFileMgr.insert( ns(), one ); + theDataFileMgr.insertWithObjMod( ns(), one ); BSONObj result; ASSERT( Helpers::findOne( ns(), BSON( "a" << 1 ), result ) ); ASSERT_EXCEPTION( Helpers::findOne( ns(), BSON( "a" << 1 ), result, true ), AssertionException ); @@ -990,10 +1374,10 @@ Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); for( int i = 0; i < 200; ++i ) { BSONObj two = BSON( "a" << 2 ); - theDataFileMgr.insert( ns(), two ); + theDataFileMgr.insertWithObjMod( ns(), two ); } BSONObj one = BSON( "a" << 1 ); - theDataFileMgr.insert( ns(), one ); + theDataFileMgr.insertWithObjMod( ns(), one ); deleteObjects( ns(), BSON( "a" << 1 ), false ); ASSERT( BSON( "a" << 1 ).woCompare( NamespaceDetailsTransient::_get( ns() ).indexForPattern( FieldRangeSet( ns(), BSON( "a" << 1 ) ).pattern() ) ) == 0 ); ASSERT_EQUALS( 2, NamespaceDetailsTransient::_get( ns() ).nScannedForPattern( FieldRangeSet( ns(), BSON( "a" << 1 ) ).pattern() ) ); @@ -1007,11 +1391,11 @@ BSONObj one = BSON( "_id" << 3 << "a" << 1 ); BSONObj two = BSON( "_id" << 2 << "a" << 1 ); BSONObj three = BSON( "_id" << 1 << "a" << -1 ); - theDataFileMgr.insert( ns(), one ); - theDataFileMgr.insert( ns(), two ); - theDataFileMgr.insert( ns(), three ); + theDataFileMgr.insertWithObjMod( ns(), one ); + theDataFileMgr.insertWithObjMod( ns(), two ); + theDataFileMgr.insertWithObjMod( ns(), three ); deleteObjects( ns(), BSON( "_id" << GT << 0 << "a" << GT << 0 ), true ); - for( auto_ptr< Cursor > c = theDataFileMgr.findAll( ns() ); c->ok(); c->advance() ) + for( boost::shared_ptr c = theDataFileMgr.findAll( ns() ); c->ok(); c->advance() ) ASSERT( 3 != c->current().getIntField( "_id" ) ); } }; @@ -1023,11 +1407,11 @@ BSONObj one = BSON( "a" << 2 << "_id" << 0 ); BSONObj two = BSON( "a" << 1 << "_id" << 1 ); BSONObj three = BSON( "a" << 0 << "_id" << 2 ); - theDataFileMgr.insert( ns(), one ); - theDataFileMgr.insert( ns(), two ); - theDataFileMgr.insert( ns(), three ); + theDataFileMgr.insertWithObjMod( ns(), one ); + theDataFileMgr.insertWithObjMod( ns(), two ); + theDataFileMgr.insertWithObjMod( ns(), three ); deleteObjects( ns(), BSON( "a" << GTE << 0 << "_id" << GT << 0 ), true ); - for( auto_ptr< Cursor > c = theDataFileMgr.findAll( ns() ); c->ok(); c->advance() ) + for( boost::shared_ptr c = theDataFileMgr.findAll( ns() ); c->ok(); c->advance() ) ASSERT( 2 != c->current().getIntField( "_id" ) ); } }; @@ -1039,7 +1423,7 @@ for( int i = 0; i < 100; ++i ) { for( int j = 0; j < 2; ++j ) { BSONObj temp = BSON( "a" << 100 - i - 1 << "b" << i ); - theDataFileMgr.insert( ns(), temp ); + theDataFileMgr.insertWithObjMod( ns(), temp ); } } Message m; @@ -1071,13 +1455,14 @@ Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); for( int i = 0; i < 10; ++i ) { BSONObj temp = BSON( "a" << i ); - theDataFileMgr.insert( ns(), temp ); + theDataFileMgr.insertWithObjMod( ns(), temp ); } BSONObj hint = fromjson( "{$hint:{a:1}}" ); BSONElement hintElt = hint.firstElement(); - QueryPlanSet s( ns(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSONObj(), &hintElt ); - QueryPlan qp( nsd(), 1, s.fbs(), BSONObj() ); - auto_ptr< Cursor > c = qp.newCursor(); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ) ) ); + QueryPlanSet s( ns(), frs, fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSONObj(), &hintElt ); + QueryPlan qp( nsd(), 1, s.fbs(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSONObj() ); + boost::shared_ptr c = qp.newCursor(); double expected[] = { 2, 3, 6, 9 }; for( int i = 0; i < 4; ++i, c->advance() ) { ASSERT_EQUALS( expected[ i ], c->current().getField( "a" ).number() ); @@ -1086,9 +1471,10 @@ // now check reverse { - QueryPlanSet s( ns(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSON( "a" << -1 ), &hintElt ); - QueryPlan qp( nsd(), 1, s.fbs(), BSON( "a" << -1 ) ); - auto_ptr< Cursor > c = qp.newCursor(); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ) ) ); + QueryPlanSet s( ns(), frs, fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSON( "a" << -1 ), &hintElt ); + QueryPlan qp( nsd(), 1, s.fbs(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSON( "a" << -1 ) ); + boost::shared_ptr c = qp.newCursor(); double expected[] = { 9, 6, 3, 2 }; for( int i = 0; i < 4; ++i, c->advance() ) { ASSERT_EQUALS( expected[ i ], c->current().getField( "a" ).number() ); @@ -1104,15 +1490,17 @@ Helpers::ensureIndex( ns(), BSON( "a" << 1 << "b" << 1 ), false, "a_1_b_1" ); for( int i = 0; i < 10; ++i ) { BSONObj temp = BSON( "a" << 5 << "b" << i ); - theDataFileMgr.insert( ns(), temp ); + theDataFileMgr.insertWithObjMod( ns(), temp ); } BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" ); BSONElement hintElt = hint.firstElement(); - QueryPlanSet s( ns(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj(), &hintElt ); - QueryPlan qp( nsd(), 1, s.fbs(), BSONObj() ); - auto_ptr< Cursor > c = qp.newCursor(); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ) ) ); + QueryPlan qp( nsd(), 1, *frs, fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj() ); + boost::shared_ptr c = qp.newCursor(); double expected[] = { 2, 3, 6, 9 }; + ASSERT( c->ok() ); for( int i = 0; i < 4; ++i, c->advance() ) { + ASSERT( c->ok() ); ASSERT_EQUALS( expected[ i ], c->current().getField( "b" ).number() ); } ASSERT( !c->ok() ); @@ -1125,15 +1513,16 @@ Helpers::ensureIndex( ns(), BSON( "a" << 1 << "b" << 1 ), false, "a_1_b_1" ); for( int i = 0; i < 10; ++i ) { BSONObj temp = BSON( "a" << 5 << "b" << i ); - theDataFileMgr.insert( ns(), temp ); + theDataFileMgr.insertWithObjMod( ns(), temp ); } BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" ); BSONElement hintElt = hint.firstElement(); - QueryPlanSet s( ns(), fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj(), &hintElt ); - QueryPlan qp( nsd(), 1, s.fbs(), BSONObj() ); - auto_ptr< Cursor > c = qp.newCursor(); - for( int i = 2; i < 10; ++i, c->advance() ) { - ASSERT_EQUALS( i, c->current().getField( "b" ).number() ); + auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ) ) ); + QueryPlan qp( nsd(), 1, *frs, fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj() ); + boost::shared_ptr c = qp.newCursor(); + int matches[] = { 2, 3, 6, 9 }; + for( int i = 0; i < 4; ++i, c->advance() ) { + ASSERT_EQUALS( matches[ i ], c->current().getField( "b" ).number() ); } ASSERT( !c->ok() ); } @@ -1141,6 +1530,55 @@ } // namespace QueryPlanSetTests + class Base { + public: + Base() : _ctx( ns() ) { + string err; + userCreateNS( ns(), BSONObj(), err, false ); + } + ~Base() { + if ( !nsd() ) + return; + string s( ns() ); + dropNS( s ); + } + protected: + static const char *ns() { return "unittests.BaseTests"; } + static NamespaceDetails *nsd() { return nsdetails( ns() ); } + private: + dblock lk_; + Client::Context _ctx; + }; + + class BestGuess : public Base { + public: + void run() { + Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" ); + Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" ); + BSONObj temp = BSON( "a" << 1 ); + theDataFileMgr.insertWithObjMod( ns(), temp ); + temp = BSON( "b" << 1 ); + theDataFileMgr.insertWithObjMod( ns(), temp ); + + boost::shared_ptr< Cursor > c = bestGuessCursor( ns(), BSON( "b" << 1 ), BSON( "a" << 1 ) ); + ASSERT_EQUALS( string( "a" ), c->indexKeyPattern().firstElement().fieldName() ); + c = bestGuessCursor( ns(), BSON( "a" << 1 ), BSON( "b" << 1 ) ); + ASSERT_EQUALS( string( "b" ), c->indexKeyPattern().firstElement().fieldName() ); + boost::shared_ptr< MultiCursor > m = dynamic_pointer_cast< MultiCursor >( bestGuessCursor( ns(), fromjson( "{b:1,$or:[{z:1}]}" ), BSON( "a" << 1 ) ) ); + ASSERT_EQUALS( string( "a" ), m->sub_c()->indexKeyPattern().firstElement().fieldName() ); + m = dynamic_pointer_cast< MultiCursor >( bestGuessCursor( ns(), fromjson( "{a:1,$or:[{y:1}]}" ), BSON( "b" << 1 ) ) ); + ASSERT_EQUALS( string( "b" ), m->sub_c()->indexKeyPattern().firstElement().fieldName() ); + + FieldRangeSet frs( "ns", BSON( "a" << 1 ) ); + { + scoped_lock lk(NamespaceDetailsTransient::_qcMutex); + NamespaceDetailsTransient::get_inlock( ns() ).registerIndexForPattern( frs.pattern( BSON( "b" << 1 ) ), BSON( "a" << 1 ), 0 ); + } + m = dynamic_pointer_cast< MultiCursor >( bestGuessCursor( ns(), fromjson( "{a:1,$or:[{y:1}]}" ), BSON( "b" << 1 ) ) ); + ASSERT_EQUALS( string( "b" ), m->sub_c()->indexKeyPattern().firstElement().fieldName() ); + } + }; + class All : public Suite { public: All() : Suite( "queryoptimizer" ){} @@ -1169,6 +1607,72 @@ add< FieldRangeTests::InLowerBound >(); add< FieldRangeTests::InUpperBound >(); add< FieldRangeTests::MultiBound >(); + add< FieldRangeTests::Diff1 >(); + add< FieldRangeTests::Diff2 >(); + add< FieldRangeTests::Diff3 >(); + add< FieldRangeTests::Diff4 >(); + add< FieldRangeTests::Diff5 >(); + add< FieldRangeTests::Diff6 >(); + add< FieldRangeTests::Diff7 >(); + add< FieldRangeTests::Diff8 >(); + add< FieldRangeTests::Diff9 >(); + add< FieldRangeTests::Diff10 >(); + add< FieldRangeTests::Diff11 >(); + add< FieldRangeTests::Diff12 >(); + add< FieldRangeTests::Diff13 >(); + add< FieldRangeTests::Diff14 >(); + add< FieldRangeTests::Diff15 >(); + add< FieldRangeTests::Diff16 >(); + add< FieldRangeTests::Diff17 >(); + add< FieldRangeTests::Diff18 >(); + add< FieldRangeTests::Diff19 >(); + add< FieldRangeTests::Diff20 >(); + add< FieldRangeTests::Diff21 >(); + add< FieldRangeTests::Diff22 >(); + add< FieldRangeTests::Diff23 >(); + add< FieldRangeTests::Diff24 >(); + add< FieldRangeTests::Diff25 >(); + add< FieldRangeTests::Diff26 >(); + add< FieldRangeTests::Diff27 >(); + add< FieldRangeTests::Diff28 >(); + add< FieldRangeTests::Diff29 >(); + add< FieldRangeTests::Diff30 >(); + add< FieldRangeTests::Diff31 >(); + add< FieldRangeTests::Diff32 >(); + add< FieldRangeTests::Diff33 >(); + add< FieldRangeTests::Diff34 >(); + add< FieldRangeTests::Diff35 >(); + add< FieldRangeTests::Diff36 >(); + add< FieldRangeTests::Diff37 >(); + add< FieldRangeTests::Diff38 >(); + add< FieldRangeTests::Diff39 >(); + add< FieldRangeTests::Diff40 >(); + add< FieldRangeTests::Diff41 >(); + add< FieldRangeTests::Diff42 >(); + add< FieldRangeTests::Diff43 >(); + add< FieldRangeTests::Diff44 >(); + add< FieldRangeTests::Diff45 >(); + add< FieldRangeTests::Diff46 >(); + add< FieldRangeTests::Diff47 >(); + add< FieldRangeTests::Diff48 >(); + add< FieldRangeTests::Diff49 >(); + add< FieldRangeTests::Diff50 >(); + add< FieldRangeTests::Diff51 >(); + add< FieldRangeTests::Diff52 >(); + add< FieldRangeTests::Diff53 >(); + add< FieldRangeTests::Diff54 >(); + add< FieldRangeTests::Diff55 >(); + add< FieldRangeTests::Diff56 >(); + add< FieldRangeTests::Diff57 >(); + add< FieldRangeTests::Diff58 >(); + add< FieldRangeTests::Diff59 >(); + add< FieldRangeTests::Diff60 >(); + add< FieldRangeTests::Diff61 >(); + add< FieldRangeTests::Diff62 >(); + add< FieldRangeTests::Diff63 >(); + add< FieldRangeTests::DiffMulti1 >(); + add< FieldRangeTests::DiffMulti2 >(); + add< FieldRangeTests::SetIntersect >(); add< QueryPlanTests::NoIndex >(); add< QueryPlanTests::SimpleOrder >(); add< QueryPlanTests::MoreIndexThanNeeded >(); @@ -1206,6 +1710,7 @@ add< QueryPlanSetTests::InQueryIntervals >(); add< QueryPlanSetTests::EqualityThenIn >(); add< QueryPlanSetTests::NotEqualityThenIn >(); + add< BestGuess >(); } } myall; diff -Nru mongodb-1.4.4/dbtests/querytests.cpp mongodb-1.6.3/dbtests/querytests.cpp --- mongodb-1.4.4/dbtests/querytests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/querytests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/query.h" #include "../db/db.h" @@ -42,7 +42,7 @@ } ~Base() { try { - auto_ptr< Cursor > c = theDataFileMgr.findAll( ns() ); + boost::shared_ptr c = theDataFileMgr.findAll( ns() ); vector< DiskLoc > toDelete; for(; c->ok(); c->advance() ) toDelete.push_back( c->currLoc() ); @@ -58,7 +58,7 @@ } static void addIndex( const BSONObj &key ) { BSONObjBuilder b; - b.append( "name", "index" ); + b.append( "name", key.firstElement().fieldName() ); b.append( "ns", ns() ); b.append( "key", key ); BSONObj o = b.done(); @@ -129,6 +129,19 @@ ASSERT_EQUALS( 1, runCount( ns(), cmd, err ) ); } }; + + class FindOne : public Base { + public: + void run() { + addIndex( BSON( "b" << 1 ) ); + addIndex( BSON( "c" << 1 ) ); + insert( BSON( "b" << 2 << "_id" << 0 ) ); + insert( BSON( "c" << 3 << "_id" << 1 ) ); + BSONObj ret; + ASSERT( Helpers::findOne( ns(), fromjson( "{$or:[{b:2},{c:3}]}" ), ret, true ) ); + ASSERT_EQUALS( string( "b" ), ret.firstElement().fieldName() ); + } + }; class ClientBase { public: @@ -476,6 +489,20 @@ } }; + class EmbeddedNumericTypes : public ClientBase { + public: + ~EmbeddedNumericTypes() { + client().dropCollection( "unittests.querytests.NumericEmbedded" ); + } + void run() { + const char *ns = "unittests.querytests.NumericEmbedded"; + client().insert( ns, BSON( "a" << BSON ( "b" << 1 ) ) ); + ASSERT( ! client().findOne( ns, BSON( "a" << BSON ( "b" << 1.0 ) ) ).isEmpty() ); + client().ensureIndex( ns , BSON( "a" << 1 ) ); + ASSERT( ! client().findOne( ns, BSON( "a" << BSON ( "b" << 1.0 ) ) ).isEmpty() ); + } + }; + class AutoResetIndexCache : public ClientBase { public: ~AutoResetIndexCache() { @@ -736,7 +763,8 @@ auto_ptr< DBClientCursor > cursor = client().query( ns, Query().sort( "7" ) ); while ( cursor->more() ){ BSONObj o = cursor->next(); - cout << " foo " << o << endl; + assert( o.valid() ); + //cout << " foo " << o << endl; } } @@ -1059,6 +1087,45 @@ }; }; + namespace queryobjecttests { + class names1 { + public: + void run(){ + ASSERT_EQUALS( BSON( "x" << 1 ) , QUERY( "query" << BSON( "x" << 1 ) ).getFilter() ); + ASSERT_EQUALS( BSON( "x" << 1 ) , QUERY( "$query" << BSON( "x" << 1 ) ).getFilter() ); + } + + }; + } + + class OrderingTest { + public: + void run(){ + { + Ordering o = Ordering::make( BSON( "a" << 1 << "b" << -1 << "c" << 1 ) ); + ASSERT_EQUALS( 1 , o.get(0) ); + ASSERT_EQUALS( -1 , o.get(1) ); + ASSERT_EQUALS( 1 , o.get(2) ); + + ASSERT( ! o.descending( 1 ) ); + ASSERT( o.descending( 1 << 1 ) ); + ASSERT( ! o.descending( 1 << 2 ) ); + } + + { + Ordering o = Ordering::make( BSON( "a.d" << 1 << "a" << 1 << "e" << -1 ) ); + ASSERT_EQUALS( 1 , o.get(0) ); + ASSERT_EQUALS( 1 , o.get(1) ); + ASSERT_EQUALS( -1 , o.get(2) ); + + ASSERT( ! o.descending( 1 ) ); + ASSERT( ! o.descending( 1 << 1 ) ); + ASSERT( o.descending( 1 << 2 ) ); + } + + } + }; + class All : public Suite { public: All() : Suite( "query" ) { @@ -1070,6 +1137,7 @@ add< CountFields >(); add< CountQueryFields >(); add< CountIndexedRegex >(); + add< FindOne >(); add< BoundedKey >(); add< GetMore >(); add< PositiveLimit >(); @@ -1086,6 +1154,7 @@ add< EmptyFieldSpec >(); add< MultiNe >(); add< EmbeddedNe >(); + add< EmbeddedNumericTypes >(); add< AutoResetIndexCache >(); add< UniqueIndex >(); add< UniqueIndexPreexistingData >(); @@ -1107,8 +1176,12 @@ add< FindingStart >(); add< FindingStartPartiallyFull >(); add< WhatsMyUri >(); - + add< parsedtests::basic1 >(); + + add< queryobjecttests::names1 >(); + + add< OrderingTest >(); } } myall; diff -Nru mongodb-1.4.4/dbtests/repltests.cpp mongodb-1.6.3/dbtests/repltests.cpp --- mongodb-1.4.4/dbtests/repltests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/repltests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/repl.h" #include "../db/db.h" @@ -89,7 +89,7 @@ int count = 0; dblock lk; Client::Context ctx( ns() ); - auto_ptr< Cursor > c = theDataFileMgr.findAll( ns() ); + boost::shared_ptr c = theDataFileMgr.findAll( ns() ); for(; c->ok(); c->advance(), ++count ) { // cout << "obj: " << c->current().toString() << endl; } @@ -99,7 +99,7 @@ dblock lk; Client::Context ctx( cllNS() ); int count = 0; - for( auto_ptr< Cursor > c = theDataFileMgr.findAll( cllNS() ); c->ok(); c->advance() ) + for( boost::shared_ptr c = theDataFileMgr.findAll( cllNS() ); c->ok(); c->advance() ) ++count; return count; } @@ -114,7 +114,7 @@ vector< BSONObj > ops; { Client::Context ctx( cllNS() ); - for( auto_ptr< Cursor > c = theDataFileMgr.findAll( cllNS() ); c->ok(); c->advance() ) + for( boost::shared_ptr c = theDataFileMgr.findAll( cllNS() ); c->ok(); c->advance() ) ops.push_back( c->current() ); } { @@ -126,7 +126,7 @@ static void printAll( const char *ns ) { dblock lk; Client::Context ctx( ns ); - auto_ptr< Cursor > c = theDataFileMgr.findAll( ns ); + boost::shared_ptr c = theDataFileMgr.findAll( ns ); vector< DiskLoc > toDelete; out() << "all for " << ns << endl; for(; c->ok(); c->advance() ) { @@ -137,7 +137,7 @@ static void deleteAll( const char *ns ) { dblock lk; Client::Context ctx( ns ); - auto_ptr< Cursor > c = theDataFileMgr.findAll( ns ); + boost::shared_ptr c = theDataFileMgr.findAll( ns ); vector< DiskLoc > toDelete; for(; c->ok(); c->advance() ) { toDelete.push_back( c->currLoc() ); @@ -387,6 +387,29 @@ } }; + class UpdateId2 : public ReplTests::Base { + public: + UpdateId2() : + o_( fromjson( "{'_id':1}" ) ), + u_( fromjson( "{'_id':2}" ) ){} + void run() { + deleteAll( ns() ); + insert( o_ ); + client()->update( ns(), o_, u_ ); + ASSERT_EQUALS( 1, count() ); + checkOne( u_ ); + + deleteAll( ns() ); + insert( o_ ); + insert( u_ ); // simulate non snapshot replication, then op application + applyAllOperations(); + ASSERT_EQUALS( 1, count() ); + checkOne( u_ ); + } + protected: + BSONObj o_, u_; + }; + class UpdateDifferentFieldExplicitId : public Base { public: UpdateDifferentFieldExplicitId() : @@ -1085,6 +1108,7 @@ add< Idempotence::UpdateSameFieldWithId >(); add< Idempotence::UpdateSameFieldExplicitId >(); add< Idempotence::UpdateId >(); + add< Idempotence::UpdateId2 >(); add< Idempotence::UpdateDifferentFieldExplicitId >(); add< Idempotence::UpsertUpdateNoMods >(); add< Idempotence::UpsertInsertNoMods >(); diff -Nru mongodb-1.4.4/dbtests/sharding.cpp mongodb-1.6.3/dbtests/sharding.cpp --- mongodb-1.4.4/dbtests/sharding.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/sharding.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "dbtests.h" diff -Nru mongodb-1.4.4/dbtests/socktests.cpp mongodb-1.6.3/dbtests/socktests.cpp --- mongodb-1.4.4/dbtests/socktests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/socktests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../util/sock.h" #include "dbtests.h" @@ -29,6 +29,7 @@ void run() { ASSERT_EQUALS( "127.0.0.1", hostbyname( "localhost" ) ); ASSERT_EQUALS( "127.0.0.1", hostbyname( "127.0.0.1" ) ); + // ASSERT_EQUALS( "::1", hostbyname( "::1" ) ); // IPv6 disabled at runtime by default. } }; diff -Nru mongodb-1.4.4/dbtests/spin_lock_test.cpp mongodb-1.6.3/dbtests/spin_lock_test.cpp --- mongodb-1.4.4/dbtests/spin_lock_test.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/dbtests/spin_lock_test.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,115 @@ +// spin_lock_test.cpp : spin_lcok.{h, cpp} unit test + +/** + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "../pch.h" +#include + +#include "dbtests.h" +#include "../util/concurrency/spin_lock.h" + +namespace { + + using mongo::SpinLock; + + class LockTester{ + public: + LockTester( SpinLock* spin, int* counter ) + : _spin(spin), _counter(counter), _requests(0){} + + ~LockTester(){ + delete _t; + } + + void start( int increments ){ + _t = new boost::thread( boost::bind(&LockTester::test, this, increments) ); + } + + void join(){ + if ( _t ) _t->join(); + } + + int requests() const{ + return _requests; + } + + private: + SpinLock* _spin; // not owned here + int* _counter; // not owned here + int _requests; + boost::thread* _t; + + void test( int increments ){ + while ( increments-- > 0 ) { + _spin->lock(); + ++(*_counter); + ++_requests; + _spin->unlock(); + } + } + + LockTester( LockTester& ); + LockTester& operator=( LockTester& ); + }; + + class ConcurrentIncs{ + public: + void run(){ + +#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + + SpinLock spin; + int counter = 0; + + const int threads = 64; + const int incs = 10000; + LockTester* testers[threads]; + + for ( int i = 0; i < threads; i++ ){ + testers[i] = new LockTester( &spin, &counter ); + } + for ( int i = 0; i < threads; i++ ){ + testers[i]->start( incs ); + } + for ( int i = 0; i < threads; i++ ){ + testers[i]->join(); + ASSERT_EQUALS( testers[i]->requests(), incs ); + delete testers[i]; + } + + ASSERT_EQUALS( counter, threads*incs ); +#else + + // WARNING "TODO Missing spin lock in this platform." + ASSERT( true ); + + +#endif + + } + }; + + class SpinLockSuite : public Suite{ + public: + SpinLockSuite() : Suite( "spinlock" ){} + + void setupTests(){ + add< ConcurrentIncs >(); + } + } spinLockSuite; + +} // anonymous namespace diff -Nru mongodb-1.4.4/dbtests/test.vcproj mongodb-1.6.3/dbtests/test.vcproj --- mongodb-1.4.4/dbtests/test.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/test.vcproj 2010-09-24 10:02:42.000000000 -0700 @@ -43,12 +43,13 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -372,22 +214,6 @@ UsePrecompiledHeader="0" /> - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1254,10 +712,6 @@ > - - @@ -1290,168 +744,48 @@ > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + @@ -1563,6 +901,10 @@ > + + @@ -1575,47 +917,155 @@ > - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1628,6 +1078,10 @@ > + + @@ -1714,22 +1168,6 @@ PrecompiledHeaderThrough="" /> - - - - - - + + @@ -1768,6 +1210,34 @@ > + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mongodb-1.4.4/dbtests/test.vcxproj mongodb-1.6.3/dbtests/test.vcxproj --- mongodb-1.4.4/dbtests/test.vcxproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/test.vcxproj 2010-09-24 10:02:42.000000000 -0700 @@ -1,22 +1,22 @@  - - Debug Recstore - Win32 - Debug Win32 - - release_nojni - Win32 + + Debug + x64 Release Win32 + + Release + x64 + {215B2D68-0A70-4D10-8E75-B33010C62A91} @@ -24,23 +24,23 @@ Win32Proj - + Application - false - false Unicode + true - + Application Unicode true - + Application + false + false Unicode - true - + Application false false @@ -49,50 +49,64 @@ - - - - - - - + + + + - + + + + - <_ProjectFileVersion>10.0.21006.1 + <_ProjectFileVersion>10.0.30319.1 $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ $(Configuration)\ + $(Configuration)\ true + true $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ $(Configuration)\ + $(Configuration)\ false - $(SolutionDir)$(Configuration)\ - $(Configuration)\ - false - $(SolutionDir)$(Configuration)\ - $(Configuration)\ - true + false + AllRules.ruleset + AllRules.ruleset + + + + + AllRules.ruleset + AllRules.ruleset + + + + Disabled - ..\..\js\src;..\pcre-7.4;c:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - true + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + No EnableFastChecks MultiThreadedDebugDLL Use + pch.h Level3 EditAndContinue 4355;4800;%(DisableSpecificWarnings) + true - ws2_32.lib;%(AdditionalDependencies) - c:\Program Files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_32;\boost\lib\vs2010_32;\boost\lib false %(IgnoreSpecificDefaultLibraries) true @@ -100,47 +114,49 @@ MachineX86 - + - MaxSpeed - true - ..\..\js\src;..\pcre-7.4;c:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - MultiThreaded - true + Disabled + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebugDLL Use - stdafx.h + pch.h Level3 ProgramDatabase - 4355;4800;%(DisableSpecificWarnings) + 4355;4800;4267;4244;%(DisableSpecificWarnings) + No + true - ws2_32.lib;%(AdditionalDependencies) - c:\program files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_64;\boost\lib\vs2010_64;\boost\lib + false + %(IgnoreSpecificDefaultLibraries) true Console - true - true - MachineX86 - + MaxSpeed true - ..\pcre-7.4;c:\Program Files\boost\boost_1_41_0;c:\program files\java\jdk\include;c:\program files\java\jdk\include\win32;%(AdditionalIncludeDirectories) - NOJNI;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - MultiThreadedDLL + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + MultiThreaded true Use - stdafx.h + pch.h Level3 ProgramDatabase 4355;4800;%(DisableSpecificWarnings) + No + true - ws2_32.lib;%(AdditionalDependencies) - c:\program files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) + ws2_32.lib;psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_32;\boost\lib\vs2010_32;\boost\lib true Console true @@ -148,32 +164,36 @@ MachineX86 - + - Disabled - ..\..\js\src;..\pcre-7.4;c:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - _RECSTORE;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL + MaxSpeed + true + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + MultiThreaded + true Use + pch.h Level3 - EditAndContinue - 4355;4800;%(DisableSpecificWarnings) + ProgramDatabase + 4355;4800;4267;4244;%(DisableSpecificWarnings) + No + true - ws2_32.lib;%(AdditionalDependencies) - c:\Program Files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) - false - %(IgnoreSpecificDefaultLibraries) + ws2_32.lib;psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_64;\boost\lib\vs2010_64;\boost\lib true Console - MachineX86 + true + true + + @@ -196,6 +216,8 @@ + + @@ -203,6 +225,7 @@ + @@ -212,8 +235,10 @@ - + + + @@ -228,336 +253,372 @@ - - - - - Document - true - true - - - Document - true - - - + + + + + + + + + + + + + + + + + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - + - + + Create + Create + Create + Create + + + - + + + + + + + + + + + + + + + + + + + - - Create - Create - Create - Create - + + + + + - - + + + + - - + + + - + + + - - + + + + + + @@ -575,11 +636,47 @@ + + 4180;%(DisableSpecificWarnings) + 4180;%(DisableSpecificWarnings) + + + + + + + + + + Document + true + true + true + + + Document + true + true + true + + + Document + true + true + true + + + Document + true + true + true + + diff -Nru mongodb-1.4.4/dbtests/test.vcxproj.filters mongodb-1.6.3/dbtests/test.vcxproj.filters --- mongodb-1.4.4/dbtests/test.vcxproj.filters 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/dbtests/test.vcxproj.filters 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,707 @@ + + + + + {17c97725-06a4-41a6-bc1c-f0e05eada682} + + + {0a50fb63-4ac3-4e30-a9d4-b0841878ee73} + + + {eb2684bf-ca8d-4162-9313-56a81233c471} + + + {45dab36c-864e-45de-bb8e-cf1d87a2c4f6} + + + {69e233b0-5354-4612-8474-d4e4faaee607} + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {f86d2fc9-fb76-40cf-943d-330feb945ff3} + + + {0ec2e082-aace-46da-9898-a1a7b24d60b7} + + + {12efa241-3593-4177-a0cb-1eb672491f49} + + + {3865c5a5-bdb1-4420-a3ae-5a6615d563d4} + + + {28893dc5-8a18-429a-b5c9-2cf701d324da} + + + {bc08b47a-daa3-4894-b9af-ae88755838db} + + + {2b914dc3-a760-4397-a12b-73a0381fa71d} + + + {9320a670-3b28-471a-bf92-6c8d881a37a4} + + + {4fff2dbf-30c4-4295-8db8-d513c1e36220} + + + {d499fdba-b256-4b12-af20-cdd1ae1addff} + + + {353b6f01-1cab-4156-a576-bc75ab204776} + + + + + misc and third party + + + misc and third party + + + misc and third party + + + misc and third party + + + misc and third party\pcre + + + misc and third party\pcre + + + storage related + + + storage related + + + storage related + + + storage related + + + storage related + + + client + + + client + + + client + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + btree related + + + util\concurrency + + + util\concurrency + + + util\concurrency + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + util\h + + + + + misc and third party + + + misc and third party + + + misc and third party + + + misc and third party + + + + + misc and third party + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + misc and third party\pcre + + + storage related + + + client + + + client + + + client + + + client + + + db + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\h + + + db\h + + + db\h + + + db\h + + + db\h + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + util\cpp + + + shard + + + scripting + + + scripting + + + scripting + + + scripting + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + dbtests + + + stats + + + stats + + + stats + + + replsets + + + replsets + + + replsets + + + replsets + + + btree related + + + btree related + + + db\cpp + + + replsets + + + util\concurrency + + + util\concurrency + + + replsets + + + shard + + + shard + + + util\concurrency + + + db\cpp + + + replsets + + + util\cpp + + + db\cpp + + + shard + + + shard + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + client + + + db\cpp + + + db\cpp + + + replsets + + + replsets + + + replsets + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + db\cpp + + + + + misc and third party + + + \ No newline at end of file diff -Nru mongodb-1.4.4/dbtests/threadedtests.cpp mongodb-1.6.3/dbtests/threadedtests.cpp --- mongodb-1.4.4/dbtests/threadedtests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/threadedtests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,10 +17,10 @@ * along with this program. If not, see . */ -#include "stdafx.h" -#include "../util/atomic_int.h" -#include "../util/mvar.h" -#include "../util/thread_pool.h" +#include "pch.h" +#include "../bson/util/atomic_int.h" +#include "../util/concurrency/mvar.h" +#include "../util/concurrency/thread_pool.h" #include #include @@ -129,6 +129,20 @@ } }; + class LockTest { + public: + void run(){ + // quick atomicint wrap test + // MSGID likely assumes this semantic + AtomicUInt counter = 0xffffffff; + counter++; + ASSERT( counter == 0 ); + + writelocktry lk( "" , 0 ); + ASSERT( lk.got() ); + } + }; + class All : public Suite { public: All() : Suite( "threading" ){ @@ -138,6 +152,7 @@ add< IsAtomicUIntAtomic >(); add< MVarTest >(); add< ThreadPoolTest >(); + add< LockTest >(); } } myall; } diff -Nru mongodb-1.4.4/dbtests/updatetests.cpp mongodb-1.6.3/dbtests/updatetests.cpp --- mongodb-1.4.4/dbtests/updatetests.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/dbtests/updatetests.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../db/query.h" #include "../db/db.h" @@ -678,6 +678,55 @@ }; + class inc3 : public SingleTest { + virtual BSONObj initial(){ + return BSON( "_id" << 1 << "x" << 537142123123LL ); + } + virtual BSONObj mod(){ + return BSON( "$inc" << BSON( "x" << 2 ) ); + } + virtual BSONObj after(){ + return BSON( "_id" << 1 << "x" << 537142123125LL ); + } + virtual const char * ns(){ + return "unittests.inc2"; + } + + }; + + class inc4 : public SingleTest { + virtual BSONObj initial(){ + return BSON( "_id" << 1 << "x" << 537142123123LL ); + } + virtual BSONObj mod(){ + return BSON( "$inc" << BSON( "x" << 2LL ) ); + } + virtual BSONObj after(){ + return BSON( "_id" << 1 << "x" << 537142123125LL ); + } + virtual const char * ns(){ + return "unittests.inc2"; + } + + }; + + class inc5 : public SingleTest { + virtual BSONObj initial(){ + return BSON( "_id" << 1 << "x" << 537142123123LL ); + } + virtual BSONObj mod(){ + return BSON( "$inc" << BSON( "x" << 2.0 ) ); + } + virtual BSONObj after(){ + return BSON( "_id" << 1 << "x" << 537142123125LL ); + } + virtual const char * ns(){ + return "unittests.inc2"; + } + + }; + + class bit1 : public Base { const char * ns(){ return "unittests.bit1"; @@ -775,6 +824,9 @@ add< basic::inc1 >(); add< basic::inc2 >(); + add< basic::inc3 >(); + add< basic::inc4 >(); + add< basic::inc5 >(); add< basic::bit1 >(); add< basic::unset >(); add< basic::setswitchint >(); diff -Nru mongodb-1.4.4/debian/changelog mongodb-1.6.3/debian/changelog --- mongodb-1.4.4/debian/changelog 2010-10-05 07:33:51.000000000 -0700 +++ mongodb-1.6.3/debian/changelog 2010-10-15 15:20:14.000000000 -0700 @@ -1,3 +1,59 @@ +mongodb (1:1.6.3-1ubuntu1) natty; urgency=low + + * Merge from debian unstable. (LP: #661513) Remaining changes: + - Providing a wrapper for xulrunner + - Moves all binaries under /usr/lib/mongodb + - adds xulwrapper script to mongodb-clients + - Sets up symlinks to xulwrapper in /usr/bin for all binaries in + /usr/lib/mongodb + - Adding dependency on xulrunner-1.9.2 to mongodb-clients as these + binaries require the library as well. + - patched buildscripts/hacks_ubuntu.py: removing rpath + because wrapper is used + - Removing debian/mongodb-server.lintian -- rpath is no longer used + - Removing debian/mongodb-clients.lintian -- rpath is no longer used + + -- Clint Byrum Fri, 15 Oct 2010 15:19:37 -0700 + +mongodb (1:1.6.3-1) unstable; urgency=low + + * [0ad0c09] Imported Upstream version 1.6.3 + http://jira.mongodb.org/browse/SERVER/fixforversion/10190 + * replica_sets slavedelay, rollback + * sharding optimization for larger than ram data sets + + -- Antonin Kral Fri, 24 Sep 2010 19:04:25 +0200 + +mongodb (1:1.6.2-1) unstable; urgency=low + + * [90ef97f] Imported Upstream version 1.6.2 + http://jira.mongodb.org/browse/SERVER/fixforversion/10187 + + -- Antonin Kral Thu, 02 Sep 2010 17:41:00 +0200 + +mongodb (1:1.6.1-1) unstable; urgency=low + + * [3b9581e] Imported Upstream version 1.6.1 + http://jira.mongodb.org/browse/SERVER/fixforversion/10183 + * [5866afb] updated watch file fot 1.6.x version + + -- Antonin Kral Wed, 18 Aug 2010 09:33:36 +0200 + +mongodb (1:1.6.0-1) unstable; urgency=low + + * [7645618] Imported Upstream version 1.6.0 + http://jira.mongodb.org/browse/SERVER/fixforversion/10182 + * [7ae74af] install libs to /usr/lib not /usr/lib64 + * [45c6766] use prefix and smokedbprefix + + -- Antonin Kral Wed, 11 Aug 2010 13:19:25 +0200 + +mongodb (1:1.4.4-3) unstable; urgency=low + + * [1576744] added libreadline-dev to build-deps (Closes: #589163) + + -- Antonin Kral Thu, 15 Jul 2010 21:41:49 +0200 + mongodb (1:1.4.4-2ubuntu2) maverick; urgency=low * Providing a wrapper for xulrunner (LP: #557024) diff -Nru mongodb-1.4.4/debian/control mongodb-1.6.3/debian/control --- mongodb-1.4.4/debian/control 2010-10-03 22:52:24.000000000 -0700 +++ mongodb-1.6.3/debian/control 2010-10-15 12:09:39.000000000 -0700 @@ -3,8 +3,9 @@ Priority: optional Maintainer: Ubuntu Developers XSBC-Original-Maintainer: Antonin Kral -Build-Depends: debhelper (>= 7), libpcre3, libpcre3-dev, scons, xulrunner-dev (>= 1.9) | xulrunner-1.9-dev | xulrunner-1.9.1-dev, libboost1.42-dev | libboost1.40-dev | libboost1.35-dev | libboost1.37-dev | libboost1.38-dev, libboost-thread1.42-dev | libboost-thread1.40-dev | libboost-thread1.38-dev | libboost-thread1.37-dev | libboost-thread1.35-dev, libboost-filesystem1.42-dev | libboost-filesystem1.40-dev | libboost-filesystem1.38-dev | libboost-filesystem1.37-dev | libboost-filesystem1.35-dev, libboost-program-options1.42-dev | libboost-program-options1.40-dev | libboost-program-options1.38-dev | libboost-program-options1.37-dev | libboost-program-options1.35-dev, libboost-date-time1.42-dev | libboost-date-time1.40-dev | libboost-date-time1.38-dev | libboost-date-time1.37-dev | libboost-date-time1.35-dev -Standards-Version: 3.9.0 +Uploaders: Roberto C. Sanchez +Build-Depends: debhelper (>= 7), libpcre3, libpcre3-dev, scons, xulrunner-dev (>= 1.9) | xulrunner-1.9-dev | xulrunner-1.9.1-dev, libreadline-dev, libboost1.42-dev | libboost1.40-dev | libboost1.35-dev | libboost1.37-dev | libboost1.38-dev, libboost-thread1.42-dev | libboost-thread1.40-dev | libboost-thread1.38-dev | libboost-thread1.37-dev | libboost-thread1.35-dev, libboost-filesystem1.42-dev | libboost-filesystem1.40-dev | libboost-filesystem1.38-dev | libboost-filesystem1.37-dev | libboost-filesystem1.35-dev, libboost-program-options1.42-dev | libboost-program-options1.40-dev | libboost-program-options1.38-dev | libboost-program-options1.37-dev | libboost-program-options1.35-dev, libboost-date-time1.42-dev | libboost-date-time1.40-dev | libboost-date-time1.38-dev | libboost-date-time1.37-dev | libboost-date-time1.35-dev +Standards-Version: 3.9.1 Homepage: http://www.mongodb.org Package: mongodb diff -Nru mongodb-1.4.4/debian/mongo.1 mongodb-1.6.3/debian/mongo.1 --- mongodb-1.4.4/debian/mongo.1 2010-08-12 21:02:15.000000000 -0700 +++ mongodb-1.6.3/debian/mongo.1 2010-10-06 02:15:38.000000000 -0700 @@ -17,16 +17,16 @@ .B mongo start the shell, connecting to the server at localhost:27017 and using the test database .TP -.B mongod foo +.B mongo foo start the shell using the foo database at localhost:27017 .TP -.B mongod 192.169.0.5/foo +.B mongo 192.169.0.5/foo start the shell using the foo database at 192.169.0.5:27017 .TP -.B mongod 192.169.0.5:9999/foo +.B mongo 192.169.0.5:9999/foo start the shell using the foo database at 192.169.0.5:9999 .TP -.B mongod script1.js script2.js script3.js +.B mongo script1.js script2.js script3.js run three scripts and exit .SH "OPTIONS" .TP diff -Nru mongodb-1.4.4/debian/patches/debian-changes-1:1.4.4-2 mongodb-1.6.3/debian/patches/debian-changes-1:1.4.4-2 --- mongodb-1.4.4/debian/patches/debian-changes-1:1.4.4-2 2010-08-12 21:02:15.000000000 -0700 +++ mongodb-1.6.3/debian/patches/debian-changes-1:1.4.4-2 1969-12-31 16:00:00.000000000 -0800 @@ -1,66 +0,0 @@ -Description: Upstream changes introduced in version 1:1.4.4-2 - This patch has been created by dpkg-source during the package build. - Here's the last changelog entry, hopefully it gives details on why - those changes were made: - . - mongodb (1:1.4.4-2) unstable; urgency=low - . - * [3bd69dc] install libs to /usr/lib not /usr/lib64 (Closes: #588557) - . - The person named in the Author field signed this changelog entry. -Author: Antonin Kral -Bug-Debian: http://bugs.debian.org/588557 - ---- -The information above should follow the Patch Tagging Guidelines, please -checkout http://dep.debian.net/deps/dep3/ to learn about the format. Here -are templates for supplementary fields that you might want to add: - -Origin: , -Bug: -Bug-Debian: http://bugs.debian.org/ -Bug-Ubuntu: https://launchpad.net/bugs/ -Forwarded: -Reviewed-By: -Last-Update: - ---- mongodb-1.4.4.orig/SConstruct -+++ mongodb-1.4.4/SConstruct -@@ -510,7 +510,7 @@ elif "linux2" == os.sys.platform: - if os.uname()[4] == "x86_64" and not force32: - linux64 = True - javaVersion = "amd64" -- nixLibPrefix = "lib64" -+ nixLibPrefix = "lib" - env.Append( LIBPATH=["/usr/lib64" , "/lib64" ] ) - env.Append( LIBS=["pthread"] ) - -@@ -1229,15 +1229,15 @@ def ensureDir( name ): - Exit( 1 ) - - def ensureTestDirs(): -- ensureDir( "/tmp/unittest/" ) -- ensureDir( "/data/" ) -- ensureDir( "/data/db/" ) -+ ensureDir( installDir + "/tmp/unittest/" ) -+ ensureDir( installDir + "/data/" ) -+ ensureDir( installDir + "/data/db/" ) - - def testSetup( env , target , source ): - ensureTestDirs() - - if len( COMMAND_LINE_TARGETS ) == 1 and str( COMMAND_LINE_TARGETS[0] ) == "test": -- ensureDir( "/tmp/unittest/" ); -+ ensureDir( installDir + "/tmp/unittest/" ); - - addSmoketest( "smoke", [ add_exe( "test" ) ] , [ test[ 0 ].abspath ] ) - addSmoketest( "smokePerf", [ "perftest" ] , [ perftest[ 0 ].abspath ] ) -@@ -1315,7 +1315,7 @@ def startMongodWithArgs(*args): - mongodForTestsPort = "32000" - import os - ensureTestDirs() -- dirName = "/data/db/sconsTests/" -+ dirName = installDir + "/data/db/sconsTests/" - ensureDir( dirName ) - from subprocess import Popen - mongodForTests = Popen([mongod[0].abspath, "--port", mongodForTestsPort, diff -Nru mongodb-1.4.4/debian/patches/debian-changes-1:1.4.4-2ubuntu2 mongodb-1.6.3/debian/patches/debian-changes-1:1.4.4-2ubuntu2 --- mongodb-1.4.4/debian/patches/debian-changes-1:1.4.4-2ubuntu2 2010-10-05 07:37:58.000000000 -0700 +++ mongodb-1.6.3/debian/patches/debian-changes-1:1.4.4-2ubuntu2 1969-12-31 16:00:00.000000000 -0800 @@ -1,70 +0,0 @@ -Description: Upstream changes introduced in version 1:1.4.4-2ubuntu2 - This patch has been created by dpkg-source during the package build. - Here's the last changelog entry, hopefully it gives details on why - those changes were made: - . - mongodb (1:1.4.4-2ubuntu2) maverick; urgency=low - . - * Providing a wrapper for xulrunner (LP: #557024) - - Moves all binaries under /usr/lib/mongodb - - adds xulwrapper script to mongodb-clients - - Sets up symlinks to xulwrapper in /usr/bin for all binaries in - /usr/lib/mongodb - - Adding dependency on xulrunner-1.9.2 to mongodb-clients as these - binaries require the library as well. - - patched SConstruct + buildscripts/hacks_ubuntu.py: removing rpath - because wrapper is used - - Removing debian/mongodb-server.lintian -- rpath is no longer used - - Removing debian/mongodb-clients.lintian -- rpath is no longer used - . - The person named in the Author field signed this changelog entry. -Author: Clint Byrum -Bug-Ubuntu: https://bugs.launchpad.net/bugs/557024 - ---- -The information above should follow the Patch Tagging Guidelines, please -checkout http://dep.debian.net/deps/dep3/ to learn about the format. Here -are templates for supplementary fields that you might want to add: - -Origin: , -Bug: -Bug-Debian: http://bugs.debian.org/ -Bug-Ubuntu: https://launchpad.net/bugs/ -Forwarded: -Reviewed-By: -Last-Update: - ---- mongodb-1.4.4.orig/SConstruct -+++ mongodb-1.4.4/SConstruct -@@ -672,8 +672,8 @@ if not nojni and useJavaHome: - if not nojni: - javaLibs += [ "java" , "jvm" ] - -- env.Append( LINKFLAGS="-Xlinker -rpath -Xlinker " + javaHome + "jre/lib/" + javaVersion + "/server" ) -- env.Append( LINKFLAGS="-Xlinker -rpath -Xlinker " + javaHome + "jre/lib/" + javaVersion ) -+ env.Append( LINKFLAGS="-Xlinker -Xlinker " + javaHome + "jre/lib/" + javaVersion + "/server" ) -+ env.Append( LINKFLAGS="-Xlinker -Xlinker " + javaHome + "jre/lib/" + javaVersion ) - - if nix: - env.Append( CPPFLAGS="-fPIC -fno-strict-aliasing -ggdb -pthread -Wall -Wsign-compare -Wno-unknown-pragmas -Winvalid-pch" ) ---- mongodb-1.4.4.orig/buildscripts/hacks_ubuntu.py -+++ mongodb-1.4.4/buildscripts/hacks_ubuntu.py -@@ -36,7 +36,6 @@ def foundxulrunner( env , options ): - return False - - env.Prepend( LIBPATH=[ libroot ] ) -- env.Prepend( RPATH=[ libroot ] ) - - env.Prepend( CPPPATH=[ incroot + "stable/" , - incroot + "unstable/" , -@@ -44,6 +43,10 @@ def foundxulrunner( env , options ): - env.Prepend( CPPPATH=[ "/usr/include/nspr/" ] ) - - env.Append( CPPDEFINES=[ "XULRUNNER" , "OLDJS" ] ) -+ -+ # Needed to run tests -+ env.AppendENVPath('LD_LIBRARY_PATH', libroot) -+ - if best.find( "1.9.0" ) >= 0 or best.endswith("1.9"): - if best.endswith( "1.9.1.9" ): - pass diff -Nru mongodb-1.4.4/debian/patches/debian-changes-1:1.6.3-1 mongodb-1.6.3/debian/patches/debian-changes-1:1.6.3-1 --- mongodb-1.4.4/debian/patches/debian-changes-1:1.6.3-1 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/debian/patches/debian-changes-1:1.6.3-1 2010-09-24 11:16:31.000000000 -0700 @@ -0,0 +1,39 @@ +Description: Upstream changes introduced in version 1:1.6.3-1 + This patch has been created by dpkg-source during the package build. + Here's the last changelog entry, hopefully it gives details on why + those changes were made: + . + mongodb (1:1.6.3-1) unstable; urgency=low + . + * [0ad0c09] Imported Upstream version 1.6.3 + http://jira.mongodb.org/browse/SERVER/fixforversion/10190 + * replica_sets slavedelay, rollback + * sharding optimization for larger than ram data sets + . + The person named in the Author field signed this changelog entry. +Author: Antonin Kral + +--- +The information above should follow the Patch Tagging Guidelines, please +checkout http://dep.debian.net/deps/dep3/ to learn about the format. Here +are templates for supplementary fields that you might want to add: + +Origin: , +Bug: +Bug-Debian: http://bugs.debian.org/ +Bug-Ubuntu: https://launchpad.net/bugs/ +Forwarded: +Reviewed-By: +Last-Update: + +--- mongodb-1.6.3.orig/SConstruct ++++ mongodb-1.6.3/SConstruct +@@ -553,7 +553,7 @@ elif "linux2" == os.sys.platform: + + if os.uname()[4] == "x86_64" and not force32: + linux64 = True +- nixLibPrefix = "lib64" ++ nixLibPrefix = "lib" + env.Append( LIBPATH=["/usr/lib64" , "/lib64" ] ) + env.Append( LIBS=["pthread"] ) + diff -Nru mongodb-1.4.4/debian/patches/debian-changes-1:1.6.3-1ubuntu1 mongodb-1.6.3/debian/patches/debian-changes-1:1.6.3-1ubuntu1 --- mongodb-1.4.4/debian/patches/debian-changes-1:1.6.3-1ubuntu1 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/debian/patches/debian-changes-1:1.6.3-1ubuntu1 2010-10-15 15:23:51.000000000 -0700 @@ -0,0 +1,61 @@ +Description: Upstream changes introduced in version 1:1.6.3-1ubuntu1 + This patch has been created by dpkg-source during the package build. + Here's the last changelog entry, hopefully it gives details on why + those changes were made: + . + mongodb (1:1.6.3-1ubuntu1) natty; urgency=low + . + * Merge from debian unstable. (LP: #661513) Remaining changes: + - Providing a wrapper for xulrunner + - Moves all binaries under /usr/lib/mongodb + - adds xulwrapper script to mongodb-clients + - Sets up symlinks to xulwrapper in /usr/bin for all binaries in + /usr/lib/mongodb + - Adding dependency on xulrunner-1.9.2 to mongodb-clients as these + binaries require the library as well. + - patched buildscripts/hacks_ubuntu.py: removing rpath + because wrapper is used + - Removing debian/mongodb-server.lintian -- rpath is no longer used + - Removing debian/mongodb-clients.lintian -- rpath is no longer used + . + The person named in the Author field signed this changelog entry. +Author: Clint Byrum +Bug-Ubuntu: https://bugs.launchpad.net/bugs/661513 + +--- +The information above should follow the Patch Tagging Guidelines, please +checkout http://dep.debian.net/deps/dep3/ to learn about the format. Here +are templates for supplementary fields that you might want to add: + +Origin: , +Bug: +Bug-Debian: http://bugs.debian.org/ +Bug-Ubuntu: https://launchpad.net/bugs/ +Forwarded: +Reviewed-By: +Last-Update: + +--- mongodb-1.6.3.orig/buildscripts/hacks_ubuntu.py ++++ mongodb-1.6.3/buildscripts/hacks_ubuntu.py +@@ -36,7 +36,6 @@ def foundxulrunner( env , options ): + return False + + env.Prepend( LIBPATH=[ libroot ] ) +- env.Prepend( RPATH=[ libroot ] ) + + env.Prepend( CPPPATH=[ incroot + "stable/" , + incroot + "unstable/" , +@@ -44,6 +43,13 @@ def foundxulrunner( env , options ): + env.Prepend( CPPPATH=[ "/usr/include/nspr/" ] ) + + env.Append( CPPDEFINES=[ "XULRUNNER" , "OLDJS" ] ) ++ ++ # Needed to run tests ++ env.AppendENVPath('LD_LIBRARY_PATH', libroot) ++ ++ # Needed to run tests ++ env.AppendENVPath('LD_LIBRARY_PATH', libroot) ++ + if best.find( "1.9.0" ) >= 0 or best.endswith("1.9"): + if best.endswith( "1.9.1.9" ): + pass diff -Nru mongodb-1.4.4/debian/patches/series mongodb-1.6.3/debian/patches/series --- mongodb-1.4.4/debian/patches/series 2010-10-05 07:37:58.000000000 -0700 +++ mongodb-1.6.3/debian/patches/series 2010-10-15 11:39:56.000000000 -0700 @@ -1,2 +1,2 @@ -debian-changes-1:1.4.4-2 -debian-changes-1:1.4.4-2ubuntu2 +debian-changes-1:1.6.3-1 +debian-changes-1:1.6.3-1ubuntu1 diff -Nru mongodb-1.4.4/debian/rules mongodb-1.6.3/debian/rules --- mongodb-1.4.4/debian/rules 2010-10-03 22:52:24.000000000 -0700 +++ mongodb-1.6.3/debian/rules 2010-10-06 02:15:38.000000000 -0700 @@ -11,7 +11,7 @@ # debug and nostrip is effectively equivalent for current version of scons # build system in current mongodb revision -DEB_SCONS_FLAGS := +DEB_SCONS_FLAGS := ifneq (,$(findstring debug,$(DEB_BUILD_OPTIONS))) DEB_SCONS_FLAGS := --d=DEBUGBUILD endif @@ -37,7 +37,7 @@ #docbook-to-man debian/mongodb.sgml > mongodb.1 #run regression tests - scons --prefix=$(CURDIR)/debian/tmp-test smoke + scons --smokedbprefix=$(CURDIR)/debian/tmp-test smoke rm -rf $(CURDIR)/debian/tmp-test touch $@ @@ -79,7 +79,7 @@ dh_prep dh_installdirs - scons --prefix=$(CURDIR)/debian/tmp/usr install + scons --prefix=$(CURDIR)/debian/tmp/usr --full install mkdir -p $(CURDIR)/debian/tmp/etc install -m 644 $(CURDIR)/debian/mongodb.conf \ diff -Nru mongodb-1.4.4/debian/watch mongodb-1.6.3/debian/watch --- mongodb-1.4.4/debian/watch 2010-08-12 21:02:15.000000000 -0700 +++ mongodb-1.6.3/debian/watch 2010-10-06 02:15:38.000000000 -0700 @@ -1,9 +1,7 @@ # Compulsory line, this is a version 3 file version=3 -# examine the content of the downloads page as the directory listing -# is not allowed anymore -# +# examination of the downloads page content as the directory listing is not allowed anymore # Forced regexp to match only to the current stable release http://www.mongodb.org/display/DOCS/Downloads \ - http://downloads\.mongodb\.org/src/mongodb-src-r(1\.4[\d\.]+)\.tar\.gz + http://.*mongodb\.org/src/mongodb-src-r(1\.6[\d\.]+)\.tar\.gz diff -Nru mongodb-1.4.4/distsrc/client/LICENSE.txt mongodb-1.6.3/distsrc/client/LICENSE.txt --- mongodb-1.4.4/distsrc/client/LICENSE.txt 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/distsrc/client/LICENSE.txt 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,203 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + diff -Nru mongodb-1.4.4/distsrc/client/SConstruct mongodb-1.6.3/distsrc/client/SConstruct --- mongodb-1.4.4/distsrc/client/SConstruct 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/distsrc/client/SConstruct 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,72 @@ + +import os + +AddOption( "--extrapath", + dest="extrapath", + type="string", + nargs=1, + action="store", + help="comma separated list of add'l paths (--extrapath /opt/foo/,/foo) static linking" ) + +env = Environment() + +def addExtraLibs( s ): + for x in s.split(","): + if os.path.exists( x ): + env.Append( CPPPATH=[ x + "/include" ] ) + env.Append( LIBPATH=[ x + "/lib" ] ) + env.Append( LIBPATH=[ x + "/lib64" ] ) + +if GetOption( "extrapath" ) is not None: + addExtraLibs( GetOption( "extrapath" ) ) + +env.Append( CPPPATH=[ "mongo/" ] ) + +env.Append( CPPDEFINES=[ "_SCONS" , "MONGO_EXPOSE_MACROS" ] ) + +nix = False +linux = False + +if "darwin" == os.sys.platform: + addExtraLibs( "/opt/local/" ) + nix = True +elif "linux2" == os.sys.platform: + nix = True + linux = True + +if nix: + env.Append( CPPFLAGS=" -O3" ) +if linux: + env.Append( LINKFLAGS=" -Wl,--as-needed -Wl,-zdefs " ) + +boostLibs = [ "thread" , "filesystem" , "system" ] +conf = Configure(env) +for lib in boostLibs: + if not conf.CheckLib("boost_%s-mt" % lib): + conf.CheckLib("boost_%s" % lib) +allClientFiles = [] +allClientFiles += Glob( "mongo/*.cpp" ) +allClientFiles += Glob( "mongo/client/*.cpp" ) +allClientFiles += Glob( "mongo/s/*.cpp" ) +allClientFiles += Glob( "mongo/shell/*.cpp" ) +allClientFiles += Glob( "mongo/db/*.cpp" ) +allClientFiles += Glob( "mongo/scripting/*.cpp" ) +allClientFiles += Glob( "mongo/util/*.cpp" ) +allClientFiles += Glob( "mongo/util/*.c" ) + +env.SharedLibrary( "mongoclient" , allClientFiles ) +env.Library( "mongoclient" , allClientFiles ) + +clientTests = [] +clientEnv = env.Clone(); +clientEnv.Prepend( LIBS=["libmongoclient.a"]) +clientEnv.Prepend( LIBPATH=["."] ) + +# examples +clientTests += [ clientEnv.Program( "firstExample" , [ "client/examples/first.cpp" ] ) ] +clientTests += [ clientEnv.Program( "secondExample" , [ "client/examples/second.cpp" ] ) ] +clientTests += [ clientEnv.Program( "whereExample" , [ "client/examples/whereExample.cpp" ] ) ] +clientTests += [ clientEnv.Program( "authTest" , [ "client/examples/authTest.cpp" ] ) ] +clientTests += [ clientEnv.Program( "httpClientTest" , [ "client/examples/httpClientTest.cpp" ] ) ] +clientTests += [ clientEnv.Program( "clientTest" , [ "client/examples/clientTest.cpp" ] ) ] +clientEnv.Alias("clientTests", clientTests, []) diff -Nru mongodb-1.4.4/distsrc/THIRD-PARTY-NOTICES mongodb-1.6.3/distsrc/THIRD-PARTY-NOTICES --- mongodb-1.4.4/distsrc/THIRD-PARTY-NOTICES 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/distsrc/THIRD-PARTY-NOTICES 2010-09-24 10:02:42.000000000 -0700 @@ -163,4 +163,29 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +4) License notice for Aladdin MD5 +--------------------------------- + +Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +L. Peter Deutsch +ghost@aladdin.com + End diff -Nru mongodb-1.4.4/docs/building.debian.etch.ec2.md mongodb-1.6.3/docs/building.debian.etch.ec2.md --- mongodb-1.4.4/docs/building.debian.etch.ec2.md 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/docs/building.debian.etch.ec2.md 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,7 @@ +Building on debian etch on ec2 +================ + ami-f2f6159b apt-get update diff -Nru mongodb-1.4.4/docs/building.md mongodb-1.6.3/docs/building.md --- mongodb-1.4.4/docs/building.md 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/docs/building.md 2010-09-24 10:02:42.000000000 -0700 @@ -2,64 +2,77 @@ Building MongoDB ================ -Scons ----------------- - - For detail information about building, please see: - http://www.mongodb.org/display/DOCS/Building +SCONS +--------------- + +For detail information about building, please see [the wiki](http://www.mongodb.org/display/DOCS/Building). - If you want to build everything (mongod, mongo, tools, etc): +If you want to build everything (mongod, mongo, tools, etc): - $ scons . + $ scons . - If you only want to build the database: +If you only want to build the database: - $ scons + $ scons + +To install - To install + $ scons --prefix=/opt/mongo install - $ scons --prefix=/opt/mongo install +Please note that prebuilt binaries are available on [mongodb.org](http://www.mongodb.org/downloads) and may be the easiest way to get started. - Please note that prebuilt binaries are available on mongodb.org and may be the easier way to get started. +SCONS TARGETS +-------------- -scons targets -------------- -* mongod -* mongos -* mongo -* mongoclient + * mongod + * mongos + * mongo + * mongoclient + * all -*general notes ---------------- - COMPILER VERSIONS +COMPILER VERSIONS +-------------- - Mongo has been tested with GCC 4.x and Visual Studio 2008. Older versions + Mongo has been tested with GCC 4.x and Visual Studio 2008 and 2010. Older versions of GCC may not be happy. -windows ---------------- +WINDOWS +-------------- - See also http://www.mongodb.org/display/DOCS/Building+for+Windows + See http://www.mongodb.org/display/DOCS/Building+for+Windows Build requirements: - vc++ express or visual studio - python 2.5 (for scons - 2.6 might be needed for some regression tests) - scons - boost 1.35 (or higher) - - windows sdk - tested with v6.0 v6.0a Or download a prebuilt binary for Windows at www.mongodb.org. -ubuntu +UBUNTU +-------------- + + scons libboost-dev libpcre++-dev xulrunner-1.9.1-dev + + +OS X +-------------- + +Try homebrew -- brew install mongodb. + + +FREEBSD -------------- - scons libboost-dev libpcre++-dev xulrunner-1.9.1-dev +Install the following ports: -FreeBSD + * devel/boost + * devel/libexecinfo + * devel/pcre + * lang/spidermonkey - Install the following ports: - - devel/boost - - devel/libexecinfo - - devel/pcre - - lang/spidermonkey +Special Build Notes +-------------- + * [debian etch on ec2](building.debian.etch.ec2.html) + * [open solaris on ec2](building.opensolaris.ec2.html) diff -Nru mongodb-1.4.4/docs/building.opensolaris.ec2.md mongodb-1.6.3/docs/building.opensolaris.ec2.md --- mongodb-1.4.4/docs/building.opensolaris.ec2.md 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/docs/building.opensolaris.ec2.md 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,17 @@ + +Building on open solaris on ec2 +================ + +ami-4133d528 + + +pkg install SUNWgcc +pkg install SUNWgit +pkg install SUNWpython-setuptools + +easy_install-2.4 scons + + +git clone git://github.com/mongodb/mongo.git +cd mongo +scons diff -Nru mongodb-1.4.4/docs/index.md mongodb-1.6.3/docs/index.md --- mongodb-1.4.4/docs/index.md 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/docs/index.md 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,9 @@ + +MongoDB Server Documentation +============ + +This is just some internal documentation. + +For the full MongoDB docs, please see [mongodb.org](http://www.mongodb.org/) + +* [building](building.html) diff -Nru mongodb-1.4.4/doxygenConfig mongodb-1.6.3/doxygenConfig --- mongodb-1.4.4/doxygenConfig 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/doxygenConfig 2010-09-24 10:02:42.000000000 -0700 @@ -3,8 +3,8 @@ #--------------------------------------------------------------------------- DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = MongoDB -PROJECT_NUMBER = 1.4.4 -OUTPUT_DIRECTORY = docs +PROJECT_NUMBER = 1.6.3 +OUTPUT_DIRECTORY = docs/doxygen CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English BRIEF_MEMBER_DESC = YES @@ -101,7 +101,7 @@ #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- -INPUT = client db/jsobj.h db/json.h +INPUT = client db/jsobj.h db/json.h bson INPUT_ENCODING = UTF-8 FILE_PATTERNS = *.c \ *.cc \ diff -Nru mongodb-1.4.4/gch.py mongodb-1.6.3/gch.py --- mongodb-1.4.4/gch.py 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/gch.py 2010-09-24 10:02:42.000000000 -0700 @@ -37,6 +37,9 @@ def gen_suffix(env, sources): return sources[0].get_suffix() + env['GCHSUFFIX'] +def header_path(node): + path = node.path + return path[:-4] # strip final '.gch' GchShBuilder = SCons.Builder.Builder(action = GchShAction, source_scanner = SCons.Scanner.C.CScanner(), @@ -54,7 +57,7 @@ deps = scanner(source[0], env, path) if env.has_key('Gch') and env['Gch']: - if env['Gch'].path.strip('.gch') in [x.path for x in deps]: + if header_path(env['Gch']) in [x.path for x in deps]: env.Depends(target, env['Gch']) return (target, source) @@ -67,7 +70,7 @@ deps = scanner(source[0], env, path) if env.has_key('GchSh') and env['GchSh']: - if env['GchSh'].path.strip('.gch') in [x.path for x in deps]: + if header_path(env['GchSh']) in [x.path for x in deps]: env.Depends(target, env['GchSh']) return (target, source) diff -Nru mongodb-1.4.4/.gitignore mongodb-1.6.3/.gitignore --- mongodb-1.4.4/.gitignore 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/.gitignore 2010-09-24 10:02:42.000000000 -0700 @@ -2,8 +2,10 @@ .dbshell .sconsign.dblite .sconf_temp +perf.data *~ +*.swp *.o *.os *.obj @@ -27,16 +29,22 @@ *.idb *.pdb *.manifest +*.user *.gch +*.*sdf +*.psess *# .#* shell/mongo.cpp shell/mongo-server.cpp -db/Debug -db/oplog* +*/Debug/ +*/*/Debug/ +*/Release/ +*/*/Release/ db/.gdb* db/makefile.local +db/_ReSharper.db config.log settings.py buildinfo.cpp @@ -49,6 +57,7 @@ logs docs/html docs/latex +docs/doxygen 32bit scratch @@ -68,16 +77,19 @@ mongosniff mongobridge mongostat +bsondump *.tgz *.zip *.tar.gz mongodb-* +mongo-cxx-driver-* #libs libmongoclient.* libmongotestfiles.* +libmongoshellfiles.* # examples firstExample diff -Nru mongodb-1.4.4/jstests/apitest_db.js mongodb-1.6.3/jstests/apitest_db.js --- mongodb-1.4.4/jstests/apitest_db.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/apitest_db.js 2010-09-24 10:02:42.000000000 -0700 @@ -2,6 +2,8 @@ * Tests for the db object enhancement */ +assert( "test" == db, "wrong database currently not test" ); + dd = function( x ){ //print( x ); } @@ -34,7 +36,7 @@ db.createCollection("test"); var found = false; db.getCollection( "system.namespaces" ).find().forEach( function(x) { if (x.name == "test.test") found = true; }); -assert(found); +assert(found, "found test.test in system.namespaces"); dd( "e" ); @@ -43,16 +45,16 @@ */ db.setProfilingLevel(0); -assert(db.getProfilingLevel() == 0); +assert(db.getProfilingLevel() == 0, "prof level 0"); db.setProfilingLevel(1); -assert(db.getProfilingLevel() == 1); +assert(db.getProfilingLevel() == 1, "p1"); db.setProfilingLevel(2); -assert(db.getProfilingLevel() == 2); +assert(db.getProfilingLevel() == 2, "p2"); db.setProfilingLevel(0); -assert(db.getProfilingLevel() == 0); +assert(db.getProfilingLevel() == 0, "prof level 0"); dd( "f" ); asserted = false; @@ -64,7 +66,7 @@ asserted = true; assert(e.dbSetProfilingException); } -assert( asserted ); +assert( asserted, "should have asserted" ); dd( "g" ); diff -Nru mongodb-1.4.4/jstests/apply_ops1.js mongodb-1.6.3/jstests/apply_ops1.js --- mongodb-1.4.4/jstests/apply_ops1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/apply_ops1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,51 @@ + +t = db.apply_ops1; +t.drop(); + +assert.eq( 0 , t.find().count() , "A0" ); +db.runCommand( { applyOps : [ { "op" : "i" , "ns" : t.getFullName() , "o" : { _id : 5 , x : 17 } } ] } ) +assert.eq( 1 , t.find().count() , "A1" ); + +o = { _id : 5 , x : 17 } +assert.eq( o , t.findOne() , "A2" ); + +res = db.runCommand( { applyOps : [ + { "op" : "u" , "ns" : t.getFullName() , "o2" : { _id : 5 } , "o" : { $inc : { x : 1 } } } , + { "op" : "u" , "ns" : t.getFullName() , "o2" : { _id : 5 } , "o" : { $inc : { x : 1 } } } +] } ) + +o.x++; +o.x++; + +assert.eq( 1 , t.find().count() , "A3" ); +assert.eq( o , t.findOne() , "A4" ); + + +res = db.runCommand( { applyOps : + [ + { "op" : "u" , "ns" : t.getFullName() , "o2" : { _id : 5 } , "o" : { $inc : { x : 1 } } } , + { "op" : "u" , "ns" : t.getFullName() , "o2" : { _id : 5 } , "o" : { $inc : { x : 1 } } } + ] + , + preCondition : [ { ns : t.getFullName() , q : { _id : 5 } , res : { x : 19 } } ] + } ); + +o.x++; +o.x++; + +assert.eq( 1 , t.find().count() , "B1" ); +assert.eq( o , t.findOne() , "B2" ); + + +res = db.runCommand( { applyOps : + [ + { "op" : "u" , "ns" : t.getFullName() , "o2" : { _id : 5 } , "o" : { $inc : { x : 1 } } } , + { "op" : "u" , "ns" : t.getFullName() , "o2" : { _id : 5 } , "o" : { $inc : { x : 1 } } } + ] + , + preCondition : [ { ns : t.getFullName() , q : { _id : 5 } , res : { x : 19 } } ] + } ); + +assert.eq( 1 , t.find().count() , "B3" ); +assert.eq( o , t.findOne() , "B4" ); + diff -Nru mongodb-1.4.4/jstests/arrayfind2.js mongodb-1.6.3/jstests/arrayfind2.js --- mongodb-1.4.4/jstests/arrayfind2.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/arrayfind2.js 2010-09-24 10:02:42.000000000 -0700 @@ -22,14 +22,14 @@ t.ensureIndex( { a : 1 } ); go( "index(a)" ); -assert.eq( [], t.find( { a : { $all : [ { $elemMatch : { x : 3 } } ] } } ).explain().indexBounds ); +assert.eq( {}, t.find( { a : { $all : [ { $elemMatch : { x : 3 } } ] } } ).explain().indexBounds ); t.ensureIndex( { "a.x": 1 } ); -assert.eq( [ [ {"a.x":3},{"a.x":3} ] ], t.find( { a : { $all : [ { $elemMatch : { x : 3 } } ] } } ).explain().indexBounds ); +assert.eq( {"a.x":[[3,3]]}, t.find( { a : { $all : [ { $elemMatch : { x : 3 } } ] } } ).explain().indexBounds ); // only first $elemMatch used to find bounds -assert.eq( [ [ {"a.x":3},{"a.x":3} ] ], t.find( { a : { $all : [ { $elemMatch : { x : 3 } }, { $elemMatch : { y : 5 } } ] } } ).explain().indexBounds ); +assert.eq( {"a.x":[[3,3]]}, t.find( { a : { $all : [ { $elemMatch : { x : 3 } }, { $elemMatch : { y : 5 } } ] } } ).explain().indexBounds ); t.ensureIndex( { "a.x":1,"a.y":-1 } ); -assert.eq( [ [ {"a.x":3,"a.y":1.7976931348623157e+308},{"a.x":3,"a.y":4} ] ], t.find( { a : { $all : [ { $elemMatch : { x : 3, y : { $gt: 4 } } } ] } } ).explain().indexBounds ); +assert.eq( {"a.x":[[3,3]],"a.y":[[1.7976931348623157e+308,4]]}, t.find( { a : { $all : [ { $elemMatch : { x : 3, y : { $gt: 4 } } } ] } } ).explain().indexBounds ); diff -Nru mongodb-1.4.4/jstests/basic3.js mongodb-1.6.3/jstests/basic3.js --- mongodb-1.4.4/jstests/basic3.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/basic3.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,5 +1,5 @@ -t = db.getCollection( "foo" ); +t = db.getCollection( "foo_basic3" ); t.find( { "a.b" : 1 } ).toArray(); diff -Nru mongodb-1.4.4/jstests/basic9.js mongodb-1.6.3/jstests/basic9.js --- mongodb-1.4.4/jstests/basic9.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/basic9.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,5 +1,5 @@ -t = db.getCollection( "foo" ); +t = db.getCollection( "foo_basic9" ); t.save( { "foo$bar" : 5 } ); diff -Nru mongodb-1.4.4/jstests/capped3.js mongodb-1.6.3/jstests/capped3.js --- mongodb-1.4.4/jstests/capped3.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/capped3.js 2010-09-24 10:02:42.000000000 -0700 @@ -23,9 +23,10 @@ i = 999; while( c.hasNext() ) { assert.eq( i--, c.next().i, "E" ); -} -print( "i: " + i ); -print( "stats: " + tojson( t2.stats() ) ); +} +//print( "i: " + i ); +var str = tojson( t2.stats() ); +//print( "stats: " + tojson( t2.stats() ) ); assert( i < 990, "F" ); t.drop(); diff -Nru mongodb-1.4.4/jstests/capped4.js mongodb-1.6.3/jstests/capped4.js --- mongodb-1.4.4/jstests/capped4.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/capped4.js 1969-12-31 16:00:00.000000000 -0800 @@ -1,28 +0,0 @@ -t = db.jstests_capped4; -t.drop(); - -db.createCollection( "jstests_capped4", {size:1000,capped:true} ); -t.ensureIndex( { i: 1 } ); -for( i = 0; i < 20; ++i ) { - t.save( { i : i } ); -} -c = t.find().sort( { $natural: -1 } ).limit( 2 ); -c.next(); -c.next(); -d = t.find().sort( { i: -1 } ).limit( 2 ); -d.next(); -d.next(); - -for( i = 20; t.findOne( { i:19 } ); ++i ) { - t.save( { i : i } ); -} -//assert( !t.findOne( { i : 19 } ), "A" ); -assert( !c.hasNext(), "B" ); -assert( !d.hasNext(), "C" ); -assert( t.find().sort( { i : 1 } ).hint( { i : 1 } ).toArray().length > 10, "D" ); - -assert( t.findOne( { i : i - 1 } ), "E" ); -t.remove( { i : i - 1 } ); -assert( db.getLastError().indexOf( "capped" ) >= 0, "F" ); - -assert( t.validate().valid, "G" ); diff -Nru mongodb-1.4.4/jstests/capped6.js mongodb-1.6.3/jstests/capped6.js --- mongodb-1.4.4/jstests/capped6.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/capped6.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,82 @@ +Random.setRandomSeed(); + +db.capped6.drop(); +db._dbCommand( { create: "capped6", capped: true, size: 1000, $nExtents: 11, autoIndexId: false } ); +tzz = db.capped6; + +function debug( x ) { +// print( x ); +} + +function checkOrder( i ) { + res = tzz.find().sort( { $natural: -1 } ); + assert( res.hasNext(), "A" ); + var j = i; + while( res.hasNext() ) { + try { + assert.eq( val[ j-- ].a, res.next().a, "B" ); + } catch( e ) { + debug( "capped6 err " + j ); + throw e; + } + } + res = tzz.find().sort( { $natural: 1 } ); + assert( res.hasNext(), "C" ); + while( res.hasNext() ) + assert.eq( val[ ++j ].a, res.next().a, "D" ); + assert.eq( j, i, "E" ); +} + +var val = new Array( 500 ); +var c = ""; +for( i = 0; i < 500; ++i, c += "-" ) { + val[ i ] = { a: c }; +} + +var oldMax = Random.randInt( 500 ); +var max = 0; + +function doTest() { + for( var i = max; i < oldMax; ++i ) { + tzz.save( val[ i ] ); + } + max = oldMax; + count = tzz.count(); + + var min = 1; + if ( Random.rand() > 0.3 ) { + min = Random.randInt( count ) + 1; + } + + while( count > min ) { + var n = Random.randInt( count - min - 1 ); // 0 <= x <= count - min - 1 + var inc = Random.rand() > 0.5; + debug( count + " " + n + " " + inc ); + assert.commandWorked( db.runCommand( { captrunc:"capped6", n:n, inc:inc } ) ); + if ( inc ) { + n += 1; + } + count -= n; + max -= n; + checkOrder( max - 1 ); + } +} + +for( var i = 0; i < 10; ++i ) { + doTest(); +} + +// reverse order of values +var val = new Array( 500 ); + +var c = ""; +for( i = 499; i >= 0; --i, c += "-" ) { + val[ i ] = { a: c }; +} +db.capped6.drop(); +db._dbCommand( { create: "capped6", capped: true, size: 1000, $nExtents: 11, autoIndexId: false } ); +tzz = db.capped6; + +for( var i = 0; i < 10; ++i ) { + doTest(); +} diff -Nru mongodb-1.4.4/jstests/capped7.js mongodb-1.6.3/jstests/capped7.js --- mongodb-1.4.4/jstests/capped7.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/capped7.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,72 @@ +Random.setRandomSeed(); + +db.capped7.drop(); +db._dbCommand( { create: "capped7", capped: true, size: 1000, $nExtents: 11, autoIndexId: false } ); +tzz = db.capped7; + +var ten = new Array( 11 ).toString().replace( /,/g, "-" ); + +count = 0; + +function insertUntilFull() { +count = tzz.count(); + var j = 0; +while( 1 ) { + tzz.save( {i:ten,j:j++} ); + var newCount = tzz.count(); + if ( count == newCount ) { + break; + } + count = newCount; +} +} + +insertUntilFull(); + +oldCount = count; + +assert.eq.automsg( "11", "tzz.stats().numExtents" ); +var oldSize = tzz.stats().storageSize; + +assert.commandWorked( db._dbCommand( { emptycapped: "capped7" } ) ); + +assert.eq.automsg( "11", "tzz.stats().numExtents" ); +assert.eq.automsg( "oldSize", "tzz.stats().storageSize" ); + +assert.eq.automsg( "0", "tzz.find().itcount()" ); +assert.eq.automsg( "0", "tzz.count()" ); + +insertUntilFull(); + +assert.eq.automsg( "oldCount", "count" ); +assert.eq.automsg( "oldCount", "tzz.find().itcount()" ); +assert.eq.automsg( "oldCount", "tzz.count()" ); + +assert.eq.automsg( "11", "tzz.stats().numExtents" ); +var oldSize = tzz.stats().storageSize; + +assert.commandWorked( db._dbCommand( { emptycapped: "capped7" } ) ); + +assert.eq.automsg( "11", "tzz.stats().numExtents" ); +assert.eq.automsg( "oldSize", "tzz.stats().storageSize" ); + +var total = Random.randInt( 2000 ); +for( var j = 1; j <= total; ++j ) { + tzz.save( {i:ten,j:j} ); + if ( Random.rand() > 0.95 ) { + assert.automsg( "j >= tzz.count()" ); + assert.eq.automsg( "tzz.count()", "tzz.find().itcount()" ); + var c = tzz.find().sort( {$natural:-1} ); + var k = j; + assert.automsg( "c.hasNext()" ); + while( c.hasNext() ) { + assert.eq.automsg( "c.next().j", "k--" ); + } + var c = tzz.find().sort( {$natural:1} ); + assert.automsg( "c.hasNext()" ); + while( c.hasNext() ) { + assert.eq.automsg( "c.next().j", "++k" ); + } + assert.eq.automsg( "j", "k" ); + } +} \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/clone/clonecollection.js mongodb-1.6.3/jstests/clone/clonecollection.js --- mongodb-1.4.4/jstests/clone/clonecollection.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/clone/clonecollection.js 2010-09-24 10:02:42.000000000 -0700 @@ -2,65 +2,6 @@ var baseName = "jstests_clonecollection"; -parallel = function() { - return t.parallelStatus; -} - -resetParallel = function() { - parallel().drop(); -} - -doParallel = function( work ) { - resetParallel(); - startMongoProgramNoConnect( "mongo", "--port", ports[ 1 ], "--eval", work + "; db.parallelStatus.save( {done:1} );", baseName ); -} - -doneParallel = function() { - return !!parallel().findOne(); -} - -waitParallel = function() { - assert.soon( function() { return doneParallel(); }, "parallel did not finish in time", 300000, 1000 ); -} - -cloneNo = -1; -startstartclone = function( spec ) { - spec = spec || ""; - cloneNo++; - doParallel( "z = db.runCommand( {startCloneCollection:\"jstests_clonecollection.a\", from:\"localhost:" + ports[ 0 ] + "\"" + spec + " } ); print( \"clone_clone_clone_commandResult::" + cloneNo + "::\" + tojson( z , '' , true ) + \":::::\" );" ); -} - -finishstartclone = function() { - waitParallel(); - // even after parallel shell finished, must wait for finishToken line to appear in log - assert.soon( function() { - raw = rawMongoProgramOutput().replace( /[\r\n]/gm , " " ) - ret = raw.match( new RegExp( "clone_clone_clone_commandResult::" + cloneNo + "::(.*):::::" ) ); - if ( ret == null ) { - return false; - } - ret = ret[ 1 ]; - return true; - } ); - - eval( "ret = " + ret ); - - assert.commandWorked( ret ); - return ret; -} - -dofinishclonecmd = function( ret ) { - finishToken = ret.finishToken; - // Round-tripping through JS can corrupt the cursor ids we store as BSON - // Date elements. Date( 0 ) will correspond to a cursorId value of 0, which - // makes the db start scanning from the beginning of the collection. - finishToken.cursorId = new Date( 0 ); - return t.runCommand( {finishCloneCollection:finishToken} ); -} - -finishclone = function( ret ) { - assert.commandWorked( dofinishclonecmd( ret ) ); -} ports = allocatePorts( 2 ); @@ -91,7 +32,9 @@ } assert.eq( 2, t.system.indexes.find().count(), "expected index missing" ); // Verify index works -assert.eq( 50, t.a.find( { i: 50 } ).hint( { i: 1 } ).explain().indexBounds[0][0].i , "verify 1" ); +x = t.a.find( { i: 50 } ).hint( { i: 1 } ).explain() +printjson( x ) +assert.eq( 50, x.indexBounds.i[0][0] , "verify 1" ); assert.eq( 1, t.a.find( { i: 50 } ).hint( { i: 1 } ).toArray().length, "match length did not match expected" ); // Check that capped-ness is preserved on clone @@ -103,91 +46,4 @@ assert.commandWorked( t.cloneCollection( "localhost:" + ports[ 0 ], "a" ) ); assert( t.a.isCapped(), "cloned collection not capped" ); -// Now test insert + delete + update during clone -f.a.drop(); -t.a.drop(); - -for( i = 0; i < 100000; ++i ) { - f.a.save( { i: i } ); -} -assert.eq( 100000, f.a.count() ); - -startstartclone( ", query:{i:{$gte:0}}" ); - -sleep( 200 ); -f.a.save( { i: 200000 } ); -f.a.save( { i: -1 } ); -f.a.remove( { i: 0 } ); -f.a.update( { i: 99998 }, { i: 99998, x: "y" } ); -assert.eq( 100001, f.a.count() , "D0" ); -ret = finishstartclone(); -finishclone( ret ); - -assert.eq( 100000, t.a.find().count() , "D1" ); -assert.eq( 1, t.a.find( { i: 200000 } ).count() , "D2" ); -assert.eq( 0, t.a.find( { i: -1 } ).count() , "D3" ); -assert.eq( 0, t.a.find( { i: 0 } ).count() , "D4" ); -assert.eq( 1, t.a.find( { i: 99998, x: "y" } ).count() , "D5" ); - - -// Now test oplog running out of space -- specify small size clone oplog for test. -f.a.drop(); -t.a.drop(); - -for( i = 0; i < 200000; ++i ) { - f.a.save( { i: i } ); -} -assert.eq( 200000, f.a.count() , "E1" ); - -startstartclone( ", logSizeMb:1" ); -ret = finishstartclone(); - -for( i = 200000; i < 250000; ++i ) { - f.a.save( { i: i } ); -} - -assert.eq( 250000, f.a.count() , "F0" ); - -assert.commandFailed( dofinishclonecmd( ret ) ); - -// Make sure the same works with standard size op log. -f.a.drop(); -t.a.drop(); - -for( i = 0; i < 200000; ++i ) { - f.a.save( { i: i } ); -} -assert.eq( 200000, f.a.count() , "F1" ); - -startstartclone(); -ret = finishstartclone(); - -for( i = 200000; i < 250000; ++i ) { - f.a.save( { i: i } ); -} -assert.eq( 250000, f.a.count() , "F2" ); - -finishclone( ret ); -assert.eq( 250000, t.a.find().count() , "F3" ); - -// Test startCloneCollection and finishCloneCollection commands. -f.a.drop(); -t.a.drop(); - -for( i = 0; i < 100000; ++i ) { - f.a.save( { i: i } ); -} -assert.eq( 100000, f.a.count() , "G1" ); - -startstartclone(); - -sleep( 200 ); -f.a.save( { i: -1 } ); - -ret = finishstartclone(); -assert.eq( 100001, t.a.find().count() , "G2" ); -f.a.save( { i: -2 } ); -assert.eq( 100002, f.a.find().count() , "G3" ); -finishclone( ret ); -assert.eq( 100002, t.a.find().count() , "G4" ); diff -Nru mongodb-1.4.4/jstests/conc_update.js mongodb-1.6.3/jstests/conc_update.js --- mongodb-1.4.4/jstests/conc_update.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/conc_update.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,45 @@ +// db = db.getSisterDB("concurrency") +// db.dropDatabase(); +// +// NRECORDS=10*1024*1024 // this needs to be relatively big so that +// // the update() will take a while. +// +// print("loading data (will take a while; progress msg every 1024*1024 documents)") +// for (i=0; i<(10*1024*1024); i++) { +// db.conc.insert({x:i}) +// if ((i%(1024*1024))==0) +// print("loaded " + i/(1024*1024) + " mibi-records") +// } +// +// print("making an index (will take a while)") +// db.conc.ensureIndex({x:1}) +// +// var c1=db.conc.count({x:{$lt:NRECORDS}}) +// // this is just a flag that the child will toggle when it's done. +// db.concflag.update({}, {inprog:true}, true) +// +// updater=startParallelShell("db=db.getSisterDB('concurrency');\ +// db.conc.update({}, {$inc:{x: "+NRECORDS+"}}, false, true);\ +// print(db.getLastError());\ +// db.concflag.update({},{inprog:false})"); +// +// querycount=0; +// decrements=0; +// misses=0 +// while (1) { +// if (db.concflag.findOne().inprog) { +// c2=db.conc.count({x:{$lt:10*1024*1024}}) +// print(c2) +// querycount++; +// if (c2 0; } ).sort( { _id : -1 } ).limit(n).itcount() + end = new Date() + + join() + + print( "num: " + num + " time:" + ( end.getTime() - start.getTime() ) ) + assert.eq( 0 , t.count() , "after remove" ) + if ( n == num ) + print( "warning: shouldn't have counted all n: " + n + " num: " + num ); +} + +run( 1500 ) +run( 5000 ) + +run( 1500 , true ) +run( 5000 , true ) + + diff -Nru mongodb-1.4.4/jstests/datasize2.js mongodb-1.6.3/jstests/datasize2.js --- mongodb-1.4.4/jstests/datasize2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/datasize2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,27 @@ + +t = db.datasize2 +t.drop(); + +N = 1000 +for ( i=0; i 0 , "listDatabases 1" ); +assert( res.databases && res.databases.length > 0 , "listDatabases 1 " + tojson(res) ); + +x = db._adminCommand( "ismaster" ); +assert( x.ismaster , "ismaster failed: " + tojson( x ) ) + +before = db.runCommand( "serverStatus" ) +sleep( 5000 ) +after = db.runCommand( "serverStatus" ) +assert.lt( 3 , after.uptimeEstimate , "up1" ) +assert.gt( after.uptimeEstimate , before.uptimeEstimate , "up2" ) // TODO: add more tests here diff -Nru mongodb-1.4.4/jstests/dbcase.js mongodb-1.6.3/jstests/dbcase.js --- mongodb-1.4.4/jstests/dbcase.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/dbcase.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,23 @@ + +a = db.getSisterDB( "dbcasetest_dbnamea" ) +b = db.getSisterDB( "dbcasetest_dbnameA" ) + +a.dropDatabase(); +b.dropDatabase(); + +a.foo.save( { x : 1 } ) +z = db.getLastErrorObj(); +assert.eq( 0 , z.code || 0 , "A : " + tojson(z) ) + +b.foo.save( { x : 1 } ) +z = db.getLastErrorObj(); +assert.eq( 13297 , z.code || 0 , "B : " + tojson(z) ) + +print( db.getMongo().getDBNames() ) + +a.dropDatabase(); +b.dropDatabase(); + +print( db.getMongo().getDBNames() ) + + diff -Nru mongodb-1.4.4/jstests/dbhash.js mongodb-1.6.3/jstests/dbhash.js --- mongodb-1.4.4/jstests/dbhash.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/dbhash.js 2010-09-24 10:02:42.000000000 -0700 @@ -5,6 +5,15 @@ a.drop(); b.drop(); +// debug SERVER-761 +db.getCollectionNames().forEach( function( x ) { + v = db[ x ].validate(); + if ( !v.valid ) { + print( x ); + printjson( v ); + } + } ); + function gh( coll , mydb ){ if ( ! mydb ) mydb = db; var x = mydb.runCommand( "dbhash" ).collections[coll.getName()]; diff -Nru mongodb-1.4.4/jstests/delx.js mongodb-1.6.3/jstests/delx.js --- mongodb-1.4.4/jstests/delx.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/delx.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,31 @@ + +a = db.getSisterDB("delxa" ) +b = db.getSisterDB("delxb" ) + +function setup( mydb ){ + mydb.dropDatabase(); + for ( i=0; i<100; i++ ){ + mydb.foo.insert( { _id : i } ); + } + mydb.getLastError(); +} + +setup( a ); +setup( b ); + +assert.eq( 100 , a.foo.find().itcount() , "A1" ) +assert.eq( 100 , b.foo.find().itcount() , "A2" ) + +x = a.foo.find().sort( { _id : 1 } ).batchSize( 60 ) +y = b.foo.find().sort( { _id : 1 } ).batchSize( 60 ) + +x.next(); +y.next(); + +a.foo.remove( { _id : { $gt : 50 } } ); + +assert.eq( 51 , a.foo.find().itcount() , "B1" ) +assert.eq( 100 , b.foo.find().itcount() , "B2" ) + +assert.eq( 59 , x.itcount() , "C1" ) +assert.eq( 99 , y.itcount() , "C2" ); // this was asserting because ClientCursor byLoc doesn't take db into consideration diff -Nru mongodb-1.4.4/jstests/disk/directoryperdb.js mongodb-1.6.3/jstests/disk/directoryperdb.js --- mongodb-1.4.4/jstests/disk/directoryperdb.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/disk/directoryperdb.js 2010-09-24 10:02:42.000000000 -0700 @@ -6,7 +6,7 @@ var m = startMongod( "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); db = m.getDB( baseName ); db[ baseName ].save( {} ); -assert.eq( 1, db[ baseName ].count() ); +assert.eq( 1, db[ baseName ].count() , "A : " + tojson( db[baseName].find().toArray() ) ); checkDir = function( dir ) { db.runCommand( {fsync:1} ); @@ -22,7 +22,7 @@ files = listFiles( dir + baseName ); for( f in files ) { - assert( new RegExp( baseName + "/" + baseName + "." ).test( files[ f ].name ) ); + assert( new RegExp( baseName + "/" + baseName + "." ).test( files[ f ].name ) , "B dir:" + dir + " f: " + f ); } } checkDir( dbpath ); @@ -40,7 +40,7 @@ } } checkDir( backupDir ); -assert.eq( 1, db[ baseName ].count() ); +assert.eq( 1, db[ baseName ].count() , "C" ); // tool test stopMongod( port ); @@ -53,7 +53,7 @@ m = startMongoProgram( "mongod", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); db = m.getDB( baseName ); checkDir( dbpath ); -assert.eq( 1, db[ baseName ].count() ); +assert.eq( 1, db[ baseName ].count() , "C" ); assert( m.getDBs().totalSize > 0, "bad size calc" ); // drop db test diff -Nru mongodb-1.4.4/jstests/disk/diskfull.js mongodb-1.6.3/jstests/disk/diskfull.js --- mongodb-1.4.4/jstests/disk/diskfull.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/disk/diskfull.js 2010-09-24 10:02:42.000000000 -0700 @@ -14,9 +14,10 @@ if ( doIt ) { port = allocatePorts( 1 )[ 0 ]; m = startMongoProgram( "mongod", "--port", port, "--dbpath", "/data/db/diskfulltest", "--nohttpinterface", "--bind_ip", "127.0.0.1" ); - m.getDB( "diskfulltest" ).getCollection( "diskfulltest" ).save( { a: 6 } ); + c = m.getDB( "diskfulltest" ).getCollection( "diskfulltest" ) + c.save( { a: 6 } ); assert.soon( function() { return rawMongoProgramOutput().match( /file allocation failure/ ); }, "didn't see 'file allocation failure'" ); - assert.soon( function() { return rawMongoProgramOutput().match( /Caught Assertion in insert , continuing/ ); }, "didn't see 'Caught Assertion...'" ); + assert.isnull( c.findOne() , "shouldn't exist" ); sleep( 3000 ); m2 = new Mongo( m.host ); printjson( m2.getDBs() ); diff -Nru mongodb-1.4.4/jstests/disk/repair2.js mongodb-1.6.3/jstests/disk/repair2.js --- mongodb-1.4.4/jstests/disk/repair2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/disk/repair2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,47 @@ +// repair with --directoryperdb + +var baseName = "jstests_disk_repair2"; + +port = allocatePorts( 1 )[ 0 ]; +dbpath = "/data/db/" + baseName + "/"; +repairpath = dbpath + "repairDir/" + +resetDbpath( dbpath ); +resetDbpath( repairpath ); + +m = startMongoProgram( "mongod", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +db = m.getDB( baseName ); +db[ baseName ].save( {} ); +assert.commandWorked( db.runCommand( {repairDatabase:1, backupOriginalFiles:true} ) ); +function check() { + files = listFiles( dbpath ); + for( f in files ) { + assert( ! new RegExp( "^" + dbpath + "backup_" ).test( files[ f ].name ), "backup dir in dbpath" ); + } + + assert.eq.automsg( "1", "db[ baseName ].count()" ); +} +check(); +stopMongod( port ); + +resetDbpath( repairpath ); +m = startMongoProgram( "mongod", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +db = m.getDB( baseName ); +assert.commandWorked( db.runCommand( {repairDatabase:1} ) ); +check(); +stopMongod( port ); + +resetDbpath( repairpath ); +rc = runMongoProgram( "mongod", "--repair", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +assert.eq.automsg( "0", "rc" ); +m = startMongoProgram( "mongod", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +db = m.getDB( baseName ); +check(); +stopMongod( port ); + +resetDbpath( repairpath ); +rc = runMongoProgram( "mongod", "--repair", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +assert.eq.automsg( "0", "rc" ); +m = startMongoProgram( "mongod", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +db = m.getDB( baseName ); +check(); diff -Nru mongodb-1.4.4/jstests/disk/repair3.js mongodb-1.6.3/jstests/disk/repair3.js --- mongodb-1.4.4/jstests/disk/repair3.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/disk/repair3.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,52 @@ +// test --repairpath on aother partition + +var baseName = "jstests_disk_repair3"; +var repairbase = "/data/db/repairpartitiontest" +var repairpath = repairbase + "/dir" + +doIt = false; +files = listFiles( "/data/db" ); +for ( i in files ) { + if ( files[ i ].name == repairbase ) { + doIt = true; + } +} + +if ( !doIt ) { + print( "path " + repairpath + " missing, skipping repair3 test" ); + doIt = false; +} + +if ( doIt ) { + + port = allocatePorts( 1 )[ 0 ]; + dbpath = "/data/db/" + baseName + "/"; + + resetDbpath( dbpath ); + resetDbpath( repairpath ); + + m = startMongoProgram( "mongod", "--nssize", "8", "--noprealloc", "--smallfiles", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); + db = m.getDB( baseName ); + db[ baseName ].save( {} ); + assert.commandWorked( db.runCommand( {repairDatabase:1, backupOriginalFiles:false} ) ); + function check() { + files = listFiles( dbpath ); + for( f in files ) { + assert( ! new RegExp( "^" + dbpath + "backup_" ).test( files[ f ].name ), "backup dir in dbpath" ); + } + + assert.eq.automsg( "1", "db[ baseName ].count()" ); + } + + check(); + stopMongod( port ); + + resetDbpath( repairpath ); + rc = runMongoProgram( "mongod", "--nssize", "8", "--noprealloc", "--smallfiles", "--repair", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); + assert.eq.automsg( "0", "rc" ); + m = startMongoProgram( "mongod", "--nssize", "8", "--noprealloc", "--smallfiles", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); + db = m.getDB( baseName ); + check(); + stopMongod( port ); + +} \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/disk/repair4.js mongodb-1.6.3/jstests/disk/repair4.js --- mongodb-1.4.4/jstests/disk/repair4.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/disk/repair4.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,44 @@ +// test that disk space check happens on --repairpath partition + +var baseName = "jstests_disk_repair4"; +var smallbase = "/data/db/repairpartitiontest" +var smallpath = smallbase + "/dir" + +doIt = false; +files = listFiles( "/data/db" ); +for ( i in files ) { + if ( files[ i ].name == smallbase ) { + doIt = true; + } +} + +if ( !doIt ) { + print( "path " + smallpath + " missing, skipping repair4 test" ); + doIt = false; +} + +if ( doIt ) { + + port = allocatePorts( 1 )[ 0 ]; + repairpath = "/data/db/" + baseName + "/"; + + resetDbpath( smallpath ); + resetDbpath( repairpath ); + + m = startMongoProgram( "mongod", "--nssize", "8", "--noprealloc", "--smallfiles", "--port", port, "--dbpath", smallpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); + db = m.getDB( baseName ); + db[ baseName ].save( {} ); + assert.commandWorked( db.runCommand( {repairDatabase:1, backupOriginalFiles:true} ) ); + function check() { + files = listFiles( smallpath ); + for( f in files ) { + assert( ! new RegExp( "^" + smallpath + "backup_" ).test( files[ f ].name ), "backup dir in dbpath" ); + } + + assert.eq.automsg( "1", "db[ baseName ].count()" ); + } + + check(); + stopMongod( port ); + +} \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/disk/repair.js mongodb-1.6.3/jstests/disk/repair.js --- mongodb-1.4.4/jstests/disk/repair.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/disk/repair.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,3 +1,5 @@ +// check --repairpath and --repair + var baseName = "jstests_disk_repair"; port = allocatePorts( 1 )[ 0 ]; @@ -10,9 +12,36 @@ m = startMongoProgram( "mongod", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); db = m.getDB( baseName ); db[ baseName ].save( {} ); -db.runCommand( {repairDatabase:1, backupOriginalFiles:true} ); +assert.commandWorked( db.runCommand( {repairDatabase:1, backupOriginalFiles:true} ) ); +function check() { + files = listFiles( dbpath ); + for( f in files ) { + assert( ! new RegExp( "^" + dbpath + "backup_" ).test( files[ f ].name ), "backup dir in dbpath" ); + } -files = listFiles( dbpath ); -for( f in files ) { - assert( ! new RegExp( "^" + dbpath + "backup_" ).test( files[ f ].name ), "backup dir in dbpath" ); + assert.eq.automsg( "1", "db[ baseName ].count()" ); } +check(); +stopMongod( port ); + +resetDbpath( repairpath ); +m = startMongoProgram( "mongod", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +db = m.getDB( baseName ); +assert.commandWorked( db.runCommand( {repairDatabase:1} ) ); +check(); +stopMongod( port ); + +resetDbpath( repairpath ); +rc = runMongoProgram( "mongod", "--repair", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +assert.eq.automsg( "0", "rc" ); +m = startMongoProgram( "mongod", "--port", port, "--dbpath", dbpath, "--repairpath", repairpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +db = m.getDB( baseName ); +check(); +stopMongod( port ); + +resetDbpath( repairpath ); +rc = runMongoProgram( "mongod", "--repair", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +assert.eq.automsg( "0", "rc" ); +m = startMongoProgram( "mongod", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); +db = m.getDB( baseName ); +check(); diff -Nru mongodb-1.4.4/jstests/distinct_array1.js mongodb-1.6.3/jstests/distinct_array1.js --- mongodb-1.4.4/jstests/distinct_array1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/distinct_array1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,24 @@ +t = db.distinct_array1; +t.drop(); + +t.save( { a : [1,2,3] } ) +t.save( { a : [2,3,4] } ) +t.save( { a : [3,4,5] } ) +t.save( { a : 9 } ) + + +res = t.distinct( "a" ); +assert.eq( "1,2,3,4,5,9" , res.toString() , "A1" ); + + +//t.drop(); + +t.save( { a : [{b:"a"}, {b:"d"}] , c : 12 } ); +t.save( { a : [{b:"b"}, {b:"d"}] , c : 12 } ); +t.save( { a : [{b:"c"}, {b:"e"}] , c : 12 } ); +t.save( { a : [{b:"c"}, {b:"f"}] , c : 12 } ); +t.save( { a : [] , c : 12 } ); +t.save( { a : { b : "z"} , c : 12 } ); + +res = t.distinct( "a.b" ); +assert.eq( "a,b,c,d,e,f,z" , res.toString() , "B1" ); diff -Nru mongodb-1.4.4/jstests/distinct_speed1.js mongodb-1.6.3/jstests/distinct_speed1.js --- mongodb-1.4.4/jstests/distinct_speed1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/distinct_speed1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,26 @@ + +t = db.distinct_speed1; + +t.drop(); +for ( var i=0; i<10000; i++ ){ + t.save( { x : i % 10 } ); +} + +assert.eq( 10 , t.distinct("x").length , "A1" ); + +function fast(){ + t.find().explain().millis; +} + +function slow(){ + t.distinct("x"); +} + +for ( i=0; i<3; i++ ){ + print( "it: " + Date.timeFunc( fast ) ); + print( "di: " + Date.timeFunc( slow ) ); +} + + +t.ensureIndex( { x : 1 } ); +t.distinct( "x" , { x : 5 } ) diff -Nru mongodb-1.4.4/jstests/drop.js mongodb-1.6.3/jstests/drop.js --- mongodb-1.4.4/jstests/drop.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/drop.js 2010-09-24 10:02:42.000000000 -0700 @@ -13,7 +13,7 @@ f.resetIndexCache(); f.ensureIndex( {a:1} ); assert.eq( 2, db.system.indexes.find( {ns:"test.jstests_drop"} ).count() , "E" ); -assert.commandWorked( db.runCommand( {deleteIndexes:"jstests_drop",index:"*"} ) ); +assert.commandWorked( db.runCommand( {deleteIndexes:"jstests_drop",index:"*"} ), "delete indexes A" ); assert.eq( 1, db.system.indexes.find( {ns:"test.jstests_drop"} ).count() , "G" ); // make sure we can still use it diff -Nru mongodb-1.4.4/jstests/evalb.js mongodb-1.6.3/jstests/evalb.js --- mongodb-1.4.4/jstests/evalb.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/evalb.js 2010-09-24 10:02:42.000000000 -0700 @@ -10,5 +10,8 @@ assert.eq( 3, db.eval( function(){ return db.evalb.findOne().x; } ) , "B" ); +o = db.system.profile.find().sort( { $natural : -1 } ).limit(1).next(); +assert( o.info.indexOf( "findOne().x" ) > 0 , "C : " + tojson( o ) ) + db.setProfilingLevel( 0 ); diff -Nru mongodb-1.4.4/jstests/evalc.js mongodb-1.6.3/jstests/evalc.js --- mongodb-1.4.4/jstests/evalc.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/evalc.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,32 @@ +t = db.jstests_evalc; +t.drop(); + +for( i = 0; i < 10; ++i ) { + t.save( {i:i} ); +} + +// SERVER-1610 + +function op() { + uri = db.runCommand( "whatsmyuri" ).you; + printjson( uri ); + p = db.currentOp().inprog; + for ( var i in p ) { + var o = p[ i ]; + if ( o.client == uri ) { + print( "found it" ); + return o.opid; + } + } + return -1; +} + +s = startParallelShell( "print( 'starting forked:' + Date() ); for ( i=0; i<500000; i++ ){ db.currentOp(); } print( 'ending forked:' + Date() ); " ) + +print( "starting eval: " + Date() ) +for ( i=0; i<20000; i++ ){ + db.eval( "db.jstests_evalc.count( {i:10} );" ); +} +print( "end eval: " + Date() ) + +s(); diff -Nru mongodb-1.4.4/jstests/explain2.js mongodb-1.6.3/jstests/explain2.js --- mongodb-1.4.4/jstests/explain2.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/explain2.js 2010-09-24 10:02:42.000000000 -0700 @@ -19,7 +19,7 @@ go( q , 6 , 7 , 6 ); q.b = 5 -go( q , 1 , 6 , 1 ); +go( q , 1 , 1 , 1 ); delete q.b q.c = 5 diff -Nru mongodb-1.4.4/jstests/_fail.js mongodb-1.6.3/jstests/_fail.js --- mongodb-1.4.4/jstests/_fail.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/_fail.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,4 @@ +// For testing the test runner. +assert.eq(1, 2, "fail1") + +print("you should not see this") \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/find_and_modify2.js mongodb-1.6.3/jstests/find_and_modify2.js --- mongodb-1.4.4/jstests/find_and_modify2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/find_and_modify2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,10 @@ +t = db.find_and_modify2; +t.drop(); + +t.insert({_id:1, i:0, j:0}); + +out = t.findAndModify({update: {$inc: {i:1}}, 'new': true, fields: {i:1}}); +assert.eq(out, {_id:1, i:1}); + +out = t.findAndModify({update: {$inc: {i:1}}, fields: {i:0}}); +assert.eq(out, {_id:1, j:0}); diff -Nru mongodb-1.4.4/jstests/find_and_modify3.js mongodb-1.6.3/jstests/find_and_modify3.js --- mongodb-1.4.4/jstests/find_and_modify3.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/find_and_modify3.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,21 @@ +t = db.find_and_modify3; +t.drop(); + +t.insert({_id:0, other:0, comments:[{i:0, j:0}, {i:1, j:1}]}); +t.insert({_id:1, other:1, comments:[{i:0, j:0}, {i:1, j:1}]}); // this is the only one that gets modded +t.insert({_id:2, other:2, comments:[{i:0, j:0}, {i:1, j:1}]}); + +orig0 = t.findOne({_id:0}) +orig2 = t.findOne({_id:2}) + +out = t.findAndModify({query: {_id:1, 'comments.i':0}, update: {$set: {'comments.$.j':2}}, 'new': true}); +assert.eq(out.comments[0], {i:0, j:2}); +assert.eq(out.comments[1], {i:1, j:1}); +assert.eq(t.findOne({_id:0}), orig0); +assert.eq(t.findOne({_id:2}), orig2); + +out = t.findAndModify({query: {other:1, 'comments.i':1}, update: {$set: {'comments.$.j':3}}, 'new': true}); +assert.eq(out.comments[0], {i:0, j:2}); +assert.eq(out.comments[1], {i:1, j:3}); +assert.eq(t.findOne({_id:0}), orig0); +assert.eq(t.findOne({_id:2}), orig2); diff -Nru mongodb-1.4.4/jstests/find_and_modify4.js mongodb-1.6.3/jstests/find_and_modify4.js --- mongodb-1.4.4/jstests/find_and_modify4.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/find_and_modify4.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,55 @@ +t = db.find_and_modify4; +t.drop(); + +// this is the best way to build auto-increment +function getNextVal(counterName){ + var ret = t.findAndModify({ + query: {_id: counterName}, + update: {$inc: {val: 1}}, + upsert: true, + 'new': true, + }); + return ret.val; +} + +assert.eq(getNextVal("a"), 1); +assert.eq(getNextVal("a"), 2); +assert.eq(getNextVal("a"), 3); +assert.eq(getNextVal("z"), 1); +assert.eq(getNextVal("z"), 2); +assert.eq(getNextVal("a"), 4); + +t.drop(); + +function helper(upsert){ + return t.findAndModify({ + query: {_id: "asdf"}, + update: {$inc: {val: 1}}, + upsert: upsert, + 'new': false // the default + }); +} + +// upsert:false so nothing there before and after +assert.eq(helper(false), null); +assert.eq(t.count(), 0); + +// upsert:false so nothing there before; something there after +assert.eq(helper(true), {}); +assert.eq(t.count(), 1); +assert.eq(helper(true), {_id: 'asdf', val: 1}); +assert.eq(helper(false), {_id: 'asdf', val: 2}); // upsert only matters when obj doesn't exist +assert.eq(helper(true), {_id: 'asdf', val: 3}); + + +// _id created if not specified +var out = t.findAndModify({ + query: {a:1}, + update: {$set: {b: 2}}, + upsert: true, + 'new': true + }); +assert.neq(out._id, undefined); +assert.eq(out.a, 1); +assert.eq(out.b, 2); + diff -Nru mongodb-1.4.4/jstests/find_and_modify.js mongodb-1.6.3/jstests/find_and_modify.js --- mongodb-1.4.4/jstests/find_and_modify.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/find_and_modify.js 2010-09-24 10:02:42.000000000 -0700 @@ -33,6 +33,6 @@ out = t.findAndModify({sort:{priority:1}, remove:1}); assert.eq(out.priority, 2); -// return empty obj if no matches (drivers may handle this differently) +// return null (was {} before 1.5.4) if no matches (drivers may handle this differently) out = t.findAndModify({query:{no_such_field:1}, remove:1}); -assert.eq(out, {}); +assert.eq(out, null); diff -Nru mongodb-1.4.4/jstests/fm4.js mongodb-1.6.3/jstests/fm4.js --- mongodb-1.4.4/jstests/fm4.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/fm4.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,16 @@ +t = db.fm4 +t.drop(); + +t.insert({_id:1, a:1, b:1}); + +assert.eq( t.findOne({}, {_id:1}), {_id:1}, 1) +assert.eq( t.findOne({}, {_id:0}), {a:1, b:1}, 2) + +assert.eq( t.findOne({}, {_id:1, a:1}), {_id:1, a:1}, 3) +assert.eq( t.findOne({}, {_id:0, a:1}), {a:1}, 4) + +assert.eq( t.findOne({}, {_id:0, a:0}), {b:1}, 6) +assert.eq( t.findOne({}, { a:0}), {_id:1, b:1}, 5) + +// not sure if we want to suport this since it is the same as above +//assert.eq( t.findOne({}, {_id:1, a:0}), {_id:1, b:1}, 5) diff -Nru mongodb-1.4.4/jstests/geo2.js mongodb-1.6.3/jstests/geo2.js --- mongodb-1.4.4/jstests/geo2.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/geo2.js 2010-09-24 10:02:42.000000000 -0700 @@ -43,6 +43,7 @@ assert.lt( 3 , a( t.find( { loc : { $near : [ 50 , 50 ] } } ).limit(50) ) , "C1" ) assert.gt( 3 , a( t.find( { loc : { $near : [ 50 , 50 , 3 ] } } ).limit(50) ) , "C2" ) +assert.gt( 3 , a( t.find( { loc : { $near : [ 50 , 50 ] , $maxDistance : 3 } } ).limit(50) ) , "C3" ) diff -Nru mongodb-1.4.4/jstests/geo3.js mongodb-1.6.3/jstests/geo3.js --- mongodb-1.4.4/jstests/geo3.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/geo3.js 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ slow = db.runCommand( { geoNear : t.getName() , near : [ 50 , 50 ] , num : 10 , start : "11" } ); -//printjson( slow.stats ); +printjson( slow.stats ); assert.lt( fast.stats.nscanned * 10 , slow.stats.nscanned , "A1" ); assert.lt( fast.stats.objectsLoaded , slow.stats.objectsLoaded , "A2" ); diff -Nru mongodb-1.4.4/jstests/geo_box3.js mongodb-1.6.3/jstests/geo_box3.js --- mongodb-1.4.4/jstests/geo_box3.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geo_box3.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,36 @@ +// How to construct a test to stress the flaw in SERVER-994: +// construct an index, think up a bounding box inside the index that +// doesn't include the center of the index, and put a point inside the +// bounding box. + +// This is the bug reported in SERVER-994. +t=db.geo_box3; +t.drop(); +t.insert({ point : { x : -15000000, y : 10000000 } }); +t.ensureIndex( { point : "2d" } , { min : -21000000 , max : 21000000 } ); +var c=t.find({point: {"$within": {"$box": [[-20000000, 7000000], [0, 15000000]]} } }); +assert.eq(1, c.count(), "A1"); + +// Same thing, modulo 1000000. +t=db.geo_box3; +t.drop(); +t.insert({ point : { x : -15, y : 10 } }); +t.ensureIndex( { point : "2d" } , { min : -21 , max : 21 } ); +var c=t.find({point: {"$within": {"$box": [[-20, 7], [0, 15]]} } }); +assert.eq(1, c.count(), "B1"); + +// Two more examples, one where the index is centered at the origin, +// one not. +t=db.geo_box3; +t.drop(); +t.insert({ point : { x : 1.0 , y : 1.0 } }); +t.ensureIndex( { point : "2d" } , { min : -2 , max : 2 } ); +var c=t.find({point: {"$within": {"$box": [[.1, .1], [1.99, 1.99]]} } }); +assert.eq(1, c.count(), "C1"); + +t=db.geo_box3; +t.drop(); +t.insert({ point : { x : 3.9 , y : 3.9 } }); +t.ensureIndex( { point : "2d" } , { min : 0 , max : 4 } ); +var c=t.find({point: {"$within": {"$box": [[2.05, 2.05], [3.99, 3.99]]} } }); +assert.eq(1, c.count(), "D1"); diff -Nru mongodb-1.4.4/jstests/geo_circle1.js mongodb-1.6.3/jstests/geo_circle1.js --- mongodb-1.4.4/jstests/geo_circle1.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/geo_circle1.js 2010-09-24 10:02:42.000000000 -0700 @@ -36,7 +36,7 @@ //printjson( Array.sort( t.find(q).map( function(z){ return z._id; } ) ) ) assert.eq( correct[i].length , t.find( q ).itcount() , "itcount : " + tojson( searches[i] ) ); - assert.eq( correct[i].length , t.find( q ).itcount() , "count : " + tojson( searches[i] ) ); + assert.eq( correct[i].length , t.find( q ).count() , "count : " + tojson( searches[i] ) ); assert.gt( correct[i].length * 2 , t.find(q).explain().nscanned , "nscanned : " + tojson( searches[i] ) ) } diff -Nru mongodb-1.4.4/jstests/geo_circle2.js mongodb-1.6.3/jstests/geo_circle2.js --- mongodb-1.4.4/jstests/geo_circle2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geo_circle2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,23 @@ + +t = db.geo_circle2; +t.drop(); + +t.ensureIndex({loc : "2d", categories:1}, {"name":"placesIdx", "min": -100, "max": 100}); + +t.insert({ "uid" : 368900 , "loc" : { "x" : -36 , "y" : -8} ,"categories" : [ "sports" , "hotel" , "restaurant"]}); +t.insert({ "uid" : 555344 , "loc" : { "x" : 13 , "y" : 29} ,"categories" : [ "sports" , "hotel"]}); +t.insert({ "uid" : 855878 , "loc" : { "x" : 38 , "y" : 30} ,"categories" : [ "sports" , "hotel"]}); +t.insert({ "uid" : 917347 , "loc" : { "x" : 15 , "y" : 46} ,"categories" : [ "hotel"]}); +t.insert({ "uid" : 647874 , "loc" : { "x" : 25 , "y" : 23} ,"categories" : [ "hotel" , "restaurant"]}); +t.insert({ "uid" : 518482 , "loc" : { "x" : 4 , "y" : 25} ,"categories" : [ ]}); +t.insert({ "uid" : 193466 , "loc" : { "x" : -39 , "y" : 22} ,"categories" : [ "sports" , "hotel"]}); +t.insert({ "uid" : 622442 , "loc" : { "x" : -24 , "y" : -46} ,"categories" : [ "hotel"]}); +t.insert({ "uid" : 297426 , "loc" : { "x" : 33 , "y" : -49} ,"categories" : [ "hotel"]}); +t.insert({ "uid" : 528464 , "loc" : { "x" : -43 , "y" : 48} ,"categories" : [ "restaurant"]}); +t.insert({ "uid" : 90579 , "loc" : { "x" : -4 , "y" : -23} ,"categories" : [ "restaurant"]}); +t.insert({ "uid" : 368895 , "loc" : { "x" : -8 , "y" : 14} ,"categories" : [ "sports" ]}); +t.insert({ "uid" : 355844 , "loc" : { "x" : 34 , "y" : -4} ,"categories" : [ "sports" , "hotel"]}); + + +assert.eq( 10 , t.find({ "loc" : { "$within" : { "$center" : [ { "x" : 0 ,"y" : 0} , 50]}} } ).itcount() , "A" ); +assert.eq( 6 , t.find({ "loc" : { "$within" : { "$center" : [ { "x" : 0 ,"y" : 0} , 50]}}, "categories" : "sports" } ).itcount() , "B" ); diff -Nru mongodb-1.4.4/jstests/geo_circle3.js mongodb-1.6.3/jstests/geo_circle3.js --- mongodb-1.4.4/jstests/geo_circle3.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geo_circle3.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,28 @@ +// SERVER-848 and SERVER-1191. +db.places.drop() + +n = 0; +db.places.save({ "_id": n++, "loc" : { "x" : 4.9999, "y" : 52 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 5, "y" : 52 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 5.0001, "y" : 52 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 5, "y" : 52.0001 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 5, "y" : 51.9999 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 4.9999, "y" : 52.0001 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 5.0001, "y" : 52.0001 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 4.9999, "y" : 51.9999 } }) +db.places.save({ "_id": n++, "loc" : { "x" : 5.0001, "y" : 51.9999 } }) +db.places.ensureIndex( { loc : "2d" } ) +radius=0.0001 +center=[5,52] +//print(db.places.find({"loc" : {"$within" : {"$center" : [center, radius]}}}).count()) +// FIXME: we want an assert, e.g., that there be 5 answers in the find(). +db.places.find({"loc" : {"$within" : {"$center" : [center, radius]}}}).forEach(printjson); + + +// the result: +// { "_id" : ObjectId("4bb1f2f088df513435bcb4e1"), "loc" : { "x" : 5, "y" : 52 } } +// { "_id" : ObjectId("4bb1f54383459c40223a8ae7"), "loc" : { "x" : 5, "y" : 51.9999 } } +// { "_id" : ObjectId("4bb1f54583459c40223a8aeb"), "loc" : { "x" : 5.0001, "y" : 51.9999 } } +// { "_id" : ObjectId("4bb1f2e588df513435bcb4e0"), "loc" : { "x" : 4.9999, "y" : 52 } } +// { "_id" : ObjectId("4bb1f30888df513435bcb4e2"), "loc" : { "x" : 5.0001, "y" : 52 } } +// { "_id" : ObjectId("4bb1f54383459c40223a8ae8"), "loc" : { "x" : 4.9999, "y" : 52.0001 } } diff -Nru mongodb-1.4.4/jstests/geo_circle4.js mongodb-1.6.3/jstests/geo_circle4.js --- mongodb-1.4.4/jstests/geo_circle4.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geo_circle4.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,24 @@ +// Reported as server-848. +db.server848.drop(); + +radius=0.0001; +center=[5,52]; + +db.server848.save({ "_id": 1, "loc" : { "x" : 4.9999, "y" : 52 } }); +db.server848.save({ "_id": 2, "loc" : { "x" : 5, "y" : 52 } }); +db.server848.save({ "_id": 3, "loc" : { "x" : 5.0001, "y" : 52 } }); +db.server848.save({ "_id": 4, "loc" : { "x" : 5, "y" : 52.0001 } }); +db.server848.save({ "_id": 5, "loc" : { "x" : 5, "y" : 51.9999 } }); +db.server848.save({ "_id": 6, "loc" : { "x" : 4.9999, "y" : 52.0001 } }); +db.server848.save({ "_id": 7, "loc" : { "x" : 5.0001, "y" : 52.0001 } }); +db.server848.save({ "_id": 8, "loc" : { "x" : 4.9999, "y" : 51.9999 } }); +db.server848.save({ "_id": 9, "loc" : { "x" : 5.0001, "y" : 51.9999 } }); +db.server848.ensureIndex( { loc : "2d" } ); +r=db.server848.find({"loc" : {"$within" : {"$center" : [center, radius]}}}, {_id:1}); +assert.eq(5, r.count(), "A1"); +// FIXME: surely code like this belongs in utils.js. +a=r.toArray(); +x=[]; +for (k in a) { x.push(a[k]["_id"]) } +x.sort() +assert.eq([1,2,3,4,5], x, "B1"); diff -Nru mongodb-1.4.4/jstests/geo_circle5.js mongodb-1.6.3/jstests/geo_circle5.js --- mongodb-1.4.4/jstests/geo_circle5.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geo_circle5.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,28 @@ +// reported as server-1238. + +db.server1238.drop(); +db.server1238.remove() +db.server1238.save({ loc: [ 5000000, 900000 ], id: 1}) +db.server1238.save({ loc: [ 5000000, 900000 ], id: 2}) +db.server1238.ensureIndex( { loc : "2d" } , { min : -21000000 , max : 21000000 } ) +db.server1238.save({ loc: [ 5000000, 900000 ], id: 3}) +db.server1238.save({ loc: [ 5000000, 900000 ], id: 4}) + +c1=db.server1238.find({"loc" : {"$within" : {"$center" : [[5000000, 900000], 1.0]}}}).count() + +c2=db.server1238.find({"loc" : {"$within" : {"$center" : [[5000001, 900000], 5.0]}}}).count() + + +assert.eq(4, c1, "A1"); +assert.eq(c1, c2, "B1"); +//print(db.server1238.find({"loc" : {"$within" : {"$center" : [[5000001, 900000], 5.0]}}}).toArray()); +// [ +// { +// "_id" : ObjectId("4c173306f5d9d34a46cb7b11"), +// "loc" : [ +// 5000000, +// 900000 +// ], +// "id" : 4 +// } +// ] \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/geod.js mongodb-1.6.3/jstests/geod.js --- mongodb-1.4.4/jstests/geod.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geod.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,14 @@ +var t=db.geod; +t.drop() +t.save( { loc: [0,0] } ) +t.save( { loc: [0.5,0] } ) +t.ensureIndex({loc:"2d"}) +// do a few geoNears with different maxDistances. The first iteration +// should match no points in the dataset. +dists = [.49, .51, 1.0] +for (idx in dists){ + b=db.runCommand({geoNear:"geod", near:[1,0], num:2, maxDistance:dists[idx]}); + assert.eq(b.errmsg, undefined, "A"+idx); + l=b.results.length + assert.eq(l, idx, "B"+idx) +} diff -Nru mongodb-1.4.4/jstests/geoe.js mongodb-1.6.3/jstests/geoe.js --- mongodb-1.4.4/jstests/geoe.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geoe.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,32 @@ +// Was reported as SERVER-1283. +// The problem seems to be that sometimes the index btrees are such that +// the first search for a matching point in the geo code could run to +// the end of the btree and not reverse direction (leaving the rest of +// the search always looking at some random non-matching point). + +t=db.geo_box; +t.drop(); + +t.insert({"_id": 1, "geo" : [ 33, -11.1 ] }); +t.insert({"_id": 2, "geo" : [ -122, 33.3 ] }); +t.insert({"_id": 3, "geo" : [ -122, 33.4 ] }); +t.insert({"_id": 4, "geo" : [ -122.28, 37.67 ] }); +t.insert({"_id": 5, "geo" : [ -122.29, 37.68 ] }); +t.insert({"_id": 6, "geo" : [ -122.29, 37.67 ] }); +t.insert({"_id": 7, "geo" : [ -122.29, 37.67 ] }); +t.insert({"_id": 8, "geo" : [ -122.29, 37.68 ] }); +t.insert({"_id": 9, "geo" : [ -122.29, 37.68 ] }); +t.insert({"_id": 10, "geo" : [ -122.3, 37.67 ] }); +t.insert({"_id": 11, "geo" : [ -122.31, 37.67 ] }); +t.insert({"_id": 12, "geo" : [ -122.3, 37.66 ] }); +t.insert({"_id": 13, "geo" : [ -122.2435, 37.637072 ] }); +t.insert({"_id": 14, "geo" : [ -122.289505, 37.695774 ] }); + + +t.ensureIndex({ geo : "2d" }); + +c=t.find({geo: {"$within": {"$box": [[-125.078461,36.494473], [-120.320648,38.905199]]} } }); +assert.eq(11, c.count(), "A1"); + +c=t.find({geo: {"$within": {"$box": [[-124.078461,36.494473], [-120.320648,38.905199]]} } }); +assert.eq(11, c.count(), "B1"); diff -Nru mongodb-1.4.4/jstests/geo_haystack1.js mongodb-1.6.3/jstests/geo_haystack1.js --- mongodb-1.4.4/jstests/geo_haystack1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/geo_haystack1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,59 @@ + +t = db.geo_haystack1 +t.drop() + +function distance( a , b ){ + var x = a[0] - b[0]; + var y = a[1] - b[1]; + return Math.sqrt( ( x * x ) + ( y * y ) ); +} + +function distanceTotal( a , arr , f ){ + var total = 0; + for ( var i=0; i queries[i].maxDistance ) + continue; + if ( queries[i].search.z != n % 5 ) + continue; + answers[i].results.push( { _id : n , loc : [ x , y ]} ) + answers[i].totalDistance += d; + } + + n++; + } +} + +t.ensureIndex( { loc : "geoHaystack" , z : 1 } , { bucketSize : .7 } ); + +for ( i=0; i queries[i].maxDistance ) + continue; + if ( queries[i].search.z != n % 10 && + queries[i].search.z != ( n + 5 ) % 10 ) + continue; + answers[i].results.push( { _id : n , loc : [ x , y ] } ) + answers[i].totalDistance += d; + } + + n++; + } +} + +t.ensureIndex( { loc : "geoHaystack" , z : 1 } , { bucketSize : .7 } ); + +for ( i=0; i ib.b[ 1 ][ 0 ]" ); +ib = t.find( {a:2,b:{$in:[3,4]}} ).sort( {a:-1,b:1} ).explain().indexBounds; +checkRanges( {a:[[2,2]],b:[[3,3],[4,4]]}, ib ); +assert.automsg( "ib.b[ 0 ][ 0 ] < ib.b[ 1 ][ 0 ]" ); diff -Nru mongodb-1.4.4/jstests/in5.js mongodb-1.6.3/jstests/in5.js --- mongodb-1.4.4/jstests/in5.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/in5.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,56 @@ + +t = db.in5 + +function go( fn ){ + t.drop(); + o = {}; + o[fn] = { a : 1 , b : 2 }; + t.insert( o ); + + x = {}; + x[fn] = { a : 1 , b : 2 }; + assert.eq( 1 , t.find( x ).itcount() , "A1 - " + fn ); + + + y = {}; + y[fn] = { $in : [ { a : 1 , b : 2 } ] } + assert.eq( 1 , t.find( y ).itcount() , "A2 - " + fn ); + + + z = {}; + z[fn+".a"] = 1; + z[fn+".b"] = { $in : [ 2 ] } + assert.eq( 1 , t.find( z ).itcount() , "A3 - " + fn ); // SERVER-1366 + + + i = {} + i[fn] = 1 + t.ensureIndex( i ) + + assert.eq( 1 , t.find( x ).itcount() , "B1 - " + fn ); + assert.eq( 1 , t.find( y ).itcount() , "B2 - " + fn ); + assert.eq( 1 , t.find( z ).itcount() , "B3 - " + fn ); // SERVER-1366 + + t.dropIndex( i ) + + assert.eq( 1 , t.getIndexes().length , "T2" ); + + i = {} + i[fn + ".a" ] = 1; + t.ensureIndex( i ) + assert.eq( 2 , t.getIndexes().length , "T3" ); + + assert.eq( 1 , t.find( x ).itcount() , "C1 - " + fn ); + assert.eq( 1 , t.find( y ).itcount() , "C2 - " + fn ); + assert.eq( 1 , t.find( z ).itcount() , "C3 - " + fn ); // SERVER-1366 + + t.dropIndex( i ) + + +} + +go( "x" ); +go( "_id" ) + + + diff -Nru mongodb-1.4.4/jstests/in6.js mongodb-1.6.3/jstests/in6.js --- mongodb-1.4.4/jstests/in6.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/in6.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,13 @@ +t = db.jstests_in6; +t.drop(); + +t.save( {} ); + +function doTest() { + assert.eq.automsg( "1", "t.count( {i:null} )" ); + assert.eq.automsg( "1", "t.count( {i:{$in:[null]}} )" ); +} + +doTest(); +t.ensureIndex( {i:1} ); +doTest(); diff -Nru mongodb-1.4.4/jstests/in7.js mongodb-1.6.3/jstests/in7.js --- mongodb-1.4.4/jstests/in7.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/in7.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,6 @@ +t = db.jstests_slow_in1; + +t.drop(); +t.ensureIndex( {a:1,b:1,c:1,d:1,e:1,f:1} ); +i = {$in:[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ]}; +assert.throws.automsg( function() { t.count( {a:i,b:i,c:i,d:i,e:i,f:i} ); } ); diff -Nru mongodb-1.4.4/jstests/index10.js mongodb-1.6.3/jstests/index10.js --- mongodb-1.4.4/jstests/index10.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/index10.js 2010-09-24 10:02:42.000000000 -0700 @@ -14,9 +14,9 @@ t.dropIndexes(); t.ensureIndex( {i:1}, true ); err = db.getLastErrorObj(); -assert( err.err ); +assert( err.err , "err.err" ); assert.eq( 11000, err.code ); -assert.eq( 1, db.system.indexes.count( {ns:"test.jstests_index10" } ) ); // only id index +assert( 1 == db.system.indexes.count( {ns:"test.jstests_index10" } ), "only id index" ); // t.dropIndexes(); ts = t.totalIndexSize(); diff -Nru mongodb-1.4.4/jstests/index1.js mongodb-1.6.3/jstests/index1.js --- mongodb-1.4.4/jstests/index1.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/index1.js 2010-09-24 10:02:42.000000000 -0700 @@ -17,9 +17,9 @@ o = { name : "bar" , z : { a : 18 } }; t.save( o ); -assert( t.find().length() == 2 ); -assert( t.find().sort( { "z.a" : 1 } ).length() == 2 ); -assert( t.find().sort( { "z.a" : -1 } ).length() == 2 ); +assert.eq.automsg( "2", "t.find().length()" ); +assert.eq.automsg( "2", "t.find().sort( { 'z.a' : 1 } ).length()" ); +assert.eq.automsg( "2", "t.find().sort( { 'z.a' : -1 } ).length()" ); // We are planning to phase out this syntax. assert( t.find().sort( { z : { a : 1 } } ).length() == 2 ); assert( t.find().sort( { z : { a: -1 } } ).length() == 2 ); diff -Nru mongodb-1.4.4/jstests/index6.js mongodb-1.6.3/jstests/index6.js --- mongodb-1.4.4/jstests/index6.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/index6.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,6 +1,6 @@ // index6.js Test indexes on array subelements. -r = db.ed.db.index5; +r = db.ed.db.index6; r.drop(); r.save( { comments : [ { name : "eliot", foo : 1 } ] } ); diff -Nru mongodb-1.4.4/jstests/index7.js mongodb-1.6.3/jstests/index7.js --- mongodb-1.4.4/jstests/index7.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/index7.js 2010-09-24 10:02:42.000000000 -0700 @@ -9,12 +9,14 @@ } function start( k, q, rev) { - var s = q.explain().indexBounds[rev?1:0][0]; + var exp = q.explain().indexBounds; + var s = {a:exp.a[rev?1:0][0],b:exp.b[0][0]}; assert.eq( k.a, s.a ); assert.eq( k.b, s.b ); } function end( k, q, rev) { - var e = q.explain().indexBounds[rev?1:0][1]; + var exp = q.explain().indexBounds + var e = {a:exp.a[rev?1:0][1],b:exp.b[0][1]}; assert.eq( k.a, e.a ); assert.eq( k.b, e.b ); } @@ -33,12 +35,12 @@ f.drop(); f.ensureIndex( { a: 1, b: 1 } ); -assert.eq( 1, f.find( { a: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds[0][0].a ); -assert.eq( 1, f.find( { a: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds[0][1].a ); -assert.eq( 1, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds[0][0].a ); -assert.eq( 1, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds[0][1].a ); -assert.eq( null, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds[0][0].c ); -assert.eq( null, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds[0][1].c ); +assert.eq( 1, f.find( { a: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds.a[0][0] ); +assert.eq( 1, f.find( { a: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds.a[0][1] ); +assert.eq( 1, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds.a[0][0] ); +assert.eq( 1, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds.a[0][1] ); +assert.eq( null, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds.c ); +assert.eq( null, f.find( { a: 1, c: 1 } ).hint( { a: 1, b: 1 } ).explain().indexBounds.c ); start( { a: "a", b: 1 }, f.find( { a: /^a/, b: 1 } ).hint( { a: 1, b: 1 } ) ); start( { a: "a", b: 1 }, f.find( { a: /^a/, b: 1 } ).sort( { a: 1, b: 1 } ).hint( { a: 1, b: 1 } ) ); diff -Nru mongodb-1.4.4/jstests/indexapi.js mongodb-1.6.3/jstests/indexapi.js --- mongodb-1.4.4/jstests/indexapi.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/indexapi.js 2010-09-24 10:02:42.000000000 -0700 @@ -34,7 +34,7 @@ assert.eq( 2 , idx.length , "M1" ); assert.eq( key , idx[1].key , "M2" ); assert( idx[1].unique , "M3" ); -printjson( idx ); +//printjson( idx ); db.system.indexes.insert( { ns : "test" , key : { x : 1 } , name : "x" } ); assert( db.getLastError().indexOf( "invalid" ) >= 0 , "Z1" ); diff -Nru mongodb-1.4.4/jstests/index_check2.js mongodb-1.6.3/jstests/index_check2.js --- mongodb-1.4.4/jstests/index_check2.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/index_check2.js 2010-09-24 10:02:42.000000000 -0700 @@ -38,4 +38,4 @@ assert( scanned3 <= Math.max( scanned1 , scanned2 ) , "$all makes query optimizer not work well" ); exp3 = t.find( q3 ).explain(); -assert.eq( exp3.indexBounds[0][0], exp3.indexBounds[0][1], "$all range not a single key" ); +assert.eq( exp3.indexBounds.tags[0][0], exp3.indexBounds.tags[0][1], "$all range not a single key" ); diff -Nru mongodb-1.4.4/jstests/index_check6.js mongodb-1.6.3/jstests/index_check6.js --- mongodb-1.4.4/jstests/index_check6.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/index_check6.js 2010-09-24 10:02:42.000000000 -0700 @@ -12,6 +12,54 @@ assert.eq( 10 , t.find( { age : 30 } ).explain().nscanned , "A" ); assert.eq( 20 , t.find( { age : { $gte : 29 , $lte : 30 } } ).explain().nscanned , "B" ); +assert.eq( 12 , t.find( { age : { $gte : 25 , $lte : 30 }, rating: {$in: [0,9] } } ).explain().nscanned , "C1" ); + +assert.eq( 2 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : 5 } ).explain().nscanned , "C" ); // SERVER-371 +assert.eq( 4 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : { $gte : 4 , $lte : 5 } } ).explain().nscanned , "D" ); // SERVER-371 + +assert.eq.automsg( "2", "t.find( { age:30, rating:{ $gte:4, $lte:5} } ).explain().nscanned" ); + +t.drop(); + +for ( var a=1; a<10; a++ ){ + for ( var b=0; b<10; b++ ){ + for ( var c=0; c<10; c++ ) { + t.save( { a:a, b:b, c:c } ); + } + } +} + +function doTest( s ) { + sort = s; +assert.eq.automsg( "1", "t.find( { a:5, b:5, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "2", "t.find( { a:5, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "1", "t.find( { a:5, b:5, c:{$gte:5.5,$lte:6} } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "1", "t.find( { a:5, b:5, c:{$gte:5,$lte:5.5} } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "3", "t.find( { a:5, b:5, c:{$gte:5,$lte:7} } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "2", "t.find( { a:5, b:{$gte:5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "1", "t.find( { a:5, b:{$gte:5.5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "1", "t.find( { a:5, b:{$gte:5,$lte:5.5}, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "3", "t.find( { a:5, b:{$gte:5,$lte:7}, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "2", "t.find( { a:{$gte:5,$lte:6}, b:5, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "1", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "1", "t.find( { a:{$gte:5,$lte:5.5}, b:5, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "3", "t.find( { a:{$gte:5,$lte:7}, b:5, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "4", "t.find( { a:{$gte:5,$lte:6}, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "2", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "4", "t.find( { a:5, b:{$gte:5,$lte:6}, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "4", "t.find( { a:{$gte:5,$lte:6}, b:{$gte:5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" ); +assert.eq.automsg( "8", "t.find( { a:{$gte:5,$lte:6}, b:{$gte:5,$lte:6}, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" ); +} + +for ( var a = -1; a <= 1; a += 2 ) { + for( var b = -1; b <= 1; b += 2 ) { + for( var c = -1; c <= 1; c += 2 ) { + t.dropIndexes(); + var spec = {a:a,b:b,c:c}; + t.ensureIndex( spec ); + doTest( spec ); + doTest( {a:-a,b:-b,c:-c} ); + } + } +} -//assert.eq( 2 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : 5 } ).explain().nscanned , "C" ); // SERVER-371 -//assert.eq( 4 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : { $gte : 4 , $lte : 5 } } ).explain().nscanned , "D" ); // SERVER-371 diff -Nru mongodb-1.4.4/jstests/index_check8.js mongodb-1.6.3/jstests/index_check8.js --- mongodb-1.4.4/jstests/index_check8.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/index_check8.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,15 @@ + +t = db.index_check8 +t.drop(); + +t.insert( { a : 1 , b : 1 , c : 1 , d : 1 , e : 1 } ) +t.ensureIndex( { a : 1 , b : 1 , c : 1 } ) +t.ensureIndex( { a : 1 , b : 1 , d : 1 , e : 1 } ) + +x = t.find( { a : 1 , b : 1 , d : 1 } ).sort( { e : 1 } ).explain() +assert( ! x.scanAndOrder , "A : " + tojson( x ) ) + +x = t.find( { a : 1 , b : 1 , c : 1 , d : 1 } ).sort( { e : 1 } ).explain() +//assert( ! x.scanAndOrder , "B : " + tojson( x ) ) + + diff -Nru mongodb-1.4.4/jstests/indexe.js mongodb-1.6.3/jstests/indexe.js --- mongodb-1.4.4/jstests/indexe.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/indexe.js 2010-09-24 10:02:42.000000000 -0700 @@ -12,7 +12,7 @@ assert.eq( num , t.find( { a : "b" } ).count() , "B1" ); assert.eq( num , t.find( { a : "b" } ).itcount() , "C1" ); -t.ensureIndex( { a : "b" } ); +t.ensureIndex( { a : 1 } ); assert.eq( num , t.find().count() ,"A2" ); assert.eq( num , t.find().sort( { a : 1 } ).count() , "A2a" ); diff -Nru mongodb-1.4.4/jstests/index_elemmatch1.js mongodb-1.6.3/jstests/index_elemmatch1.js --- mongodb-1.4.4/jstests/index_elemmatch1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/index_elemmatch1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,28 @@ + +t = db.index_elemmatch1 +t.drop() + +x = 0 +y = 0 +for ( a=0; a<100; a++ ){ + for ( b=0; b<100; b++ ){ + t.insert( { a : a , b : b % 10 , arr : [ { x : x++ % 10 , y : y++ % 10 } ] } ) + } +} + +t.ensureIndex( { a : 1 , b : 1 } ) +t.ensureIndex( { "arr.x" : 1 , a : 1 } ) + +assert.eq( 100 , t.find( { a : 55 } ).itcount() , "A1" ); +assert.eq( 10 , t.find( { a : 55 , b : 7 } ).itcount() , "A2" ); + +q = { a : 55 , b : { $in : [ 1 , 5 , 8 ] } } +assert.eq( 30 , t.find( q ).itcount() , "A3" ) + +q.arr = { $elemMatch : { x : 5 , y : 5 } } +assert.eq( 10 , t.find( q ).itcount() , "A4" ) + +assert.eq( t.find(q).itcount() , t.find(q).explain().nscanned , "A5" ) + + + diff -Nru mongodb-1.4.4/jstests/indexh.js mongodb-1.6.3/jstests/indexh.js --- mongodb-1.4.4/jstests/indexh.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/indexh.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,34 @@ +// This should get skipped when testing replication + +t = db.jstests_indexh; + +function debug( t ) { + print( t ); +} + +// index extent freeing +t.drop(); +t.save( {} ); +var s1 = db.stats().dataSize; +debug( "s1: " + s1 ); +t.ensureIndex( {a:1} ); +var s2 = db.stats().dataSize; +debug( "s2: " + s2 ); +assert.automsg( "s1 < s2" ); +t.dropIndex( {a:1} ); +var s3 = db.stats().dataSize; +debug( "s3: " + s3 ); +assert.eq.automsg( "s1", "s3" ); + +// index node freeing +t.drop(); +t.ensureIndex( {a:1} ); +for( i = 'a'; i.length < 500; i += 'a' ) { + t.save( {a:i} ); +} +var s4 = db.stats().indexSize; +debug( "s4: " + s4 ); +t.remove( {} ); +var s5 = db.stats().indexSize; +debug( "s5: " + s5 ); +assert.automsg( "s5 < s4" ); \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/index_many2.js mongodb-1.6.3/jstests/index_many2.js --- mongodb-1.4.4/jstests/index_many2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/index_many2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,29 @@ + +t = db.index_many2; +t.drop() + +t.save( { x : 1 } ) + +assert.eq( 1 , t.getIndexKeys().length , "A1" ) + +function make( n ){ + var x = {} + x["x"+n] = 1; + return x; +} + +for ( i=1; i<1000; i++ ){ + t.ensureIndex( make(i) ); +} + +assert.eq( 64 , t.getIndexKeys().length , "A2" ) + + +num = t.getIndexKeys().length + +t.dropIndex( make(num-1) ) +assert.eq( num - 1 , t.getIndexKeys().length , "B0" ) + +t.ensureIndex( { z : 1 } ) +assert.eq( num , t.getIndexKeys().length , "B1" ) + diff -Nru mongodb-1.4.4/jstests/index_many.js mongodb-1.6.3/jstests/index_many.js --- mongodb-1.4.4/jstests/index_many.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/index_many.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,34 +1,51 @@ -t = db.many; - -t.drop(); -db.many2.drop(); - -t.save({x:9}); -t.save({x:19}); - -x = 2; -while( x < 60 ) { - patt={}; - patt[x] = 1; - if( x == 20 ) - patt = { x : 1 }; - t.ensureIndex(patt); - x++; -} - -// print( tojson(db.getLastErrorObj()) ); -assert( db.getLastError(), "should have an error 'too many indexes'" ); +/* test using lots of indexes on one collection */ -// 40 is the limit currently +t = db.many; -// print( t.getIndexes().length == 40, "40" ); +function f() { -assert( t.getIndexes().length == 40, "40" ); + t.drop(); + db.many2.drop(); -assert( t.find({x:9}).length() == 1, "b" ) ; + t.save({ x: 9, y : 99 }); + t.save({ x: 19, y : 99 }); -t.renameCollection( "many2" ); + x = 2; + while (x < 70) { + patt = {}; + patt[x] = 1; + if (x == 20) + patt = { x: 1 }; + if (x == 64) + patt = { y: 1 }; + t.ensureIndex(patt); + x++; + } + + // print( tojson(db.getLastErrorObj()) ); + assert(db.getLastError(), "should have got an error 'too many indexes'"); + + // 40 is the limit currently + lim = t.getIndexes().length; + if (lim != 64) { + print("# of indexes should be 64 but is : " + lim); + return; + } + assert(lim == 64, "not 64 indexes"); + + assert(t.find({ x: 9 }).length() == 1, "b"); + assert(t.find({ x: 9 }).explain().cursor.match(/Btree/), "not using index?"); + + assert(t.find({ y: 99 }).length() == 2, "y idx"); + assert(t.find({ y: 99 }).explain().cursor.match(/Btree/), "not using y index?"); + + /* check that renamecollection remaps all the indexes right */ + assert(t.renameCollection("many2").ok, "rename failed"); + assert(t.find({ x: 9 }).length() == 0, "many2a"); + assert(db.many2.find({ x: 9 }).length() == 1, "many2b"); + assert(t.find({ y: 99 }).length() == 0, "many2c"); + assert(db.many2.find({ y: 99 }).length() == 2, "many2d"); -assert( t.find({x:9}).length() == 0, "c" ) ; +} -assert( db.many2.find({x:9}).length() == 1, "d" ) ; +f(); diff -Nru mongodb-1.4.4/jstests/maxscan.js mongodb-1.6.3/jstests/maxscan.js --- mongodb-1.4.4/jstests/maxscan.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/maxscan.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,14 @@ + +t = db.maxscan; +t.drop(); + +N = 100; +for ( i=0; i 0" ); + +assert.commandWorked( m.getDB( "local" ).repairDatabase() ); +assert.automsg( "c <= m.getDB( 'local' ).oplog.$main.count()" ); diff -Nru mongodb-1.4.4/jstests/repl/repl10.js mongodb-1.6.3/jstests/repl/repl10.js --- mongodb-1.4.4/jstests/repl/repl10.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/repl/repl10.js 2010-09-24 10:02:42.000000000 -0700 @@ -26,13 +26,15 @@ am.save( {i:2} ); assert.eq( 2, am.count() ); sleep( 3000 ); - - rt.stop( true, signal ); - sleep( 3000 ); assert.eq( 1, s.getDB( baseName ).a.count() ); + soonCount( 2 ); + rt.stop(); } -doTest( 15 ); // SIGTERM -doTest( 9 ); // SIGKILL +print("repl10.js dotest(15)"); +doTest(15); // SIGTERM +print("repl10.js dotest(15)"); +doTest(9); // SIGKILL +print("repl10.js SUCCESS"); diff -Nru mongodb-1.4.4/jstests/repl/repl12.js mongodb-1.6.3/jstests/repl/repl12.js --- mongodb-1.4.4/jstests/repl/repl12.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/repl/repl12.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,47 @@ +// SERVER-1626 +// check for initial sync of multiple db's + +function debug( x ) { + print( "DEBUG:" + tojson( x ) ); +} + +rt = new ReplTest( "repl12tests" ); + +m = rt.start( true ); + +usedDBs = [] + +a = "a" +for( i = 0; i < 3; ++i ) { + usedDBs.push( a ) + m.getDB( a ).c.save( {} ); + a += "a"; +} +m.getDB(a).getLastError(); + +//print("\n\n\n DB NAMES MASTER:"); +//printjson(m.getDBNames()); + +var z = 10500; +print("sleeping " + z + "ms"); +sleep(z); + +s = rt.start(false); + +function countHave(){ + var have = 0; + for ( var i=0; i= 1, "count check"); + doWritesToKeep2(a); + + // A is 1 2 3 7 8 + // B is 1 2 3 4 5 6 + + // bring B back online + // as A is primary, B will roll back and then catch up + B.runCommand({ replSetTest: 1, blind: false }); + + wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; }); + + // everyone is up here... + assert(A.isMaster().ismaster || A.isMaster().secondary, "A up"); + assert(B.isMaster().ismaster || B.isMaster().secondary, "B up"); + + verify(a); + + assert( dbs_match(a,b), "server data sets do not match after rollback, something is wrong"); + + pause("rollback2.js SUCCESS"); + replTest.stopSet(signal); +} + +print("rollback2.js"); + +doTest( 15 ); diff -Nru mongodb-1.4.4/jstests/replsets/rollback3.js mongodb-1.6.3/jstests/replsets/rollback3.js --- mongodb-1.4.4/jstests/replsets/rollback3.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/replsets/rollback3.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,224 @@ +// test rollback in replica sets + +// try running as : +// +// mongo --nodb rollback.js | tee out | grep -v ^m31 +// + +var debugging = 0; + +function pause(s) { + print(s); + while (debugging) { + sleep(3000); + print(s); + } +} + +function deb(obj) { + if( debugging ) { + print("\n\n\n" + obj + "\n\n"); + } +} + +w = 0; + +function wait(f) { + w++; + var n = 0; + while (!f()) { + if (n % 4 == 0) + print("rollback3.js waiting " + w); + if (++n == 4) { + print("" + f); + } + if (n == 200) { + print("rollback3.js failing waited too long"); + throw "wait error"; + } + sleep(1000); + } +} + +function dbs_match(a, b) { + print("dbs_match"); + + var ac = a.system.namespaces.find().sort({name:1}).toArray(); + var bc = b.system.namespaces.find().sort({name:1}).toArray(); + if (!friendlyEqual(ac, bc)) { + print("dbs_match: namespaces don't match"); + print("\n\n"); + printjson(ac); + print("\n\n"); + printjson(bc); + print("\n\n"); + return false; + } + + var c = a.getCollectionNames(); + for( var i in c ) { + print("checking " + c[i]); + // system.indexes doesn't have _id so the more involved sort here: + if (!friendlyEqual(a[c[i]].find().sort({ _id: 1, ns:1, name:1 }).toArray(), b[c[i]].find().sort({ _id: 1, ns:1,name:1 }).toArray())) { + print("dbs_match: collections don't match " + c[i]); + if (a[c[i]].count() < 12) { + printjson(a[c[i]].find().sort({ _id: 1 }).toArray()); + printjson(b[c[i]].find().sort({ _id: 1 }).toArray()); + } + return false; + } + } + return true; +} + +/* these writes will be initial data and replicate everywhere. */ +function doInitialWrites(db) { + db.b.insert({ x: 1 }); + db.b.ensureIndex({ x: 1 }); + db.oldname.insert({ y: 1 }); + db.oldname.insert({ y: 2 }); + db.oldname.ensureIndex({ y: 1 },true); + t = db.bar; + t.insert({ q:0}); + t.insert({ q: 1, a: "foo" }); + t.insert({ q: 2, a: "foo", x: 1 }); + t.insert({ q: 3, bb: 9, a: "foo" }); + t.insert({ q: 40333333, a: 1 }); + for (var i = 0; i < 200; i++) t.insert({ i: i }); + t.insert({ q: 40, a: 2 }); + t.insert({ q: 70, txt: 'willremove' }); + + db.createCollection("kap", { capped: true, size: 5000 }); + db.kap.insert({ foo: 1 }) +} + +/* these writes on one primary only and will be rolled back. */ +function doItemsToRollBack(db) { + t = db.bar; + t.insert({ q: 4 }); + t.update({ q: 3 }, { q: 3, rb: true }); + + t.remove({ q: 40 }); // multi remove test + + t.update({ q: 2 }, { q: 39, rb: true }); + + // rolling back a delete will involve reinserting the item(s) + t.remove({ q: 1 }); + + t.update({ q: 0 }, { $inc: { y: 1} }); + + db.kap.insert({ foo: 2 }) + db.kap2.insert({ foo: 2 }) + + // create a collection (need to roll back the whole thing) + db.newcoll.insert({ a: true }); + + // create a new empty collection (need to roll back the whole thing) + db.createCollection("abc"); + + // drop a collection - we'll need all its data back! + t.drop(); + + // drop an index - verify it comes back + db.b.dropIndexes(); + + // two to see if we transitively rollback? + db.oldname.renameCollection("newname"); + db.newname.renameCollection("fooname"); + + assert(db.fooname.count() > 0, "count rename"); + + // test roll back (drop) a whole database + abc = db.getSisterDB("abc"); + abc.foo.insert({ x: 1 }); + abc.bar.insert({ y: 999 }); + + // test making and dropping a database + //mkd = db.getSisterDB("mkd"); + //mkd.c.insert({ y: 99 }); + //mkd.dropDatabase(); +} + +function doWritesToKeep2(db) { + t = db.bar; + t.insert({ txt: 'foo' }); + t.remove({ q: 70 }); + t.update({ q: 0 }, { $inc: { y: 33} }); +} + +doTest = function (signal) { + + var replTest = new ReplSetTest({ name: 'unicomplex', nodes: 3 }); + var nodes = replTest.nodeList(); + //print(tojson(nodes)); + + var conns = replTest.startSet(); + var r = replTest.initiate({ "_id": "unicomplex", + "members": [ + { "_id": 0, "host": nodes[0] }, + { "_id": 1, "host": nodes[1] }, + { "_id": 2, "host": nodes[2], arbiterOnly: true}] + }); + + // Make sure we have a master + var master = replTest.getMaster(); + a_conn = conns[0]; + A = a_conn.getDB("admin"); + b_conn = conns[1]; + a_conn.setSlaveOk(); + b_conn.setSlaveOk(); + B = b_conn.getDB("admin"); + assert(master == conns[0], "conns[0] assumed to be master"); + assert(a_conn == master); + + //deb(master); + + // Make sure we have an arbiter + assert.soon(function () { + res = conns[2].getDB("admin").runCommand({ replSetGetStatus: 1 }); + return res.myState == 7; + }, "Arbiter failed to initialize."); + + // Wait for initial replication + var a = a_conn.getDB("foo"); + var b = b_conn.getDB("foo"); + doInitialWrites(a); + + // wait for secondary to get this data + wait(function () { return b.bar.count() == a.bar.count(); }); + + A.runCommand({ replSetTest: 1, blind: true }); + wait(function () { return B.isMaster().ismaster; }); + + doItemsToRollBack(b); + + // a should not have the new data as it was in blind state. + B.runCommand({ replSetTest: 1, blind: true }); + A.runCommand({ replSetTest: 1, blind: false }); + wait(function () { return !B.isMaster().ismaster; }); + wait(function () { return A.isMaster().ismaster; }); + + assert(a.bar.count() >= 1, "count check"); + doWritesToKeep2(a); + + // A is 1 2 3 7 8 + // B is 1 2 3 4 5 6 + + // bring B back online + // as A is primary, B will roll back and then catch up + B.runCommand({ replSetTest: 1, blind: false }); + + wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; }); + + // everyone is up here... + assert(A.isMaster().ismaster || A.isMaster().secondary, "A up"); + assert(B.isMaster().ismaster || B.isMaster().secondary, "B up"); + + assert( dbs_match(a,b), "server data sets do not match after rollback, something is wrong"); + + pause("rollback3.js SUCCESS"); + replTest.stopSet(signal); +} + +print("rollback3.js"); +doTest( 15 ); diff -Nru mongodb-1.4.4/jstests/replsets/rollback.js mongodb-1.6.3/jstests/replsets/rollback.js --- mongodb-1.4.4/jstests/replsets/rollback.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/replsets/rollback.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,155 @@ +// test rollback in replica sets + +// try running as : +// +// mongo --nodb rollback.js | tee out | grep -v ^m31 +// + +var debugging = 0; + +function pause(s) { + print(s); + while (debugging) { + sleep(3000); + print(s); + } +} + +function deb(obj) { + if( debugging ) { + print("\n\n\n" + obj + "\n\n"); + } +} + +w = 0; + +function wait(f) { + w++; + var n = 0; + while (!f()) { + if( n % 4 == 0 ) + print("rollback.js waiting " + w); + if (++n == 4) { + print("" + f); + } + sleep(1000); + } +} + +doTest = function (signal) { + + var replTest = new ReplSetTest({ name: 'unicomplex', nodes: 3 }); + var nodes = replTest.nodeList(); + //print(tojson(nodes)); + + var conns = replTest.startSet(); + var r = replTest.initiate({ "_id": "unicomplex", + "members": [ + { "_id": 0, "host": nodes[0] }, + { "_id": 1, "host": nodes[1] }, + { "_id": 2, "host": nodes[2], arbiterOnly: true}] + }); + + // Make sure we have a master + var master = replTest.getMaster(); + a_conn = conns[0]; + A = a_conn.getDB("admin"); + b_conn = conns[1]; + a_conn.setSlaveOk(); + b_conn.setSlaveOk(); + B = b_conn.getDB("admin"); + assert(master == conns[0], "conns[0] assumed to be master"); + assert(a_conn == master); + + //deb(master); + + // Make sure we have an arbiter + assert.soon(function () { + res = conns[2].getDB("admin").runCommand({ replSetGetStatus: 1 }); + return res.myState == 7; + }, "Arbiter failed to initialize."); + + // Wait for initial replication + var a = a_conn.getDB("foo"); + var b = b_conn.getDB("foo"); + + /* force the oplog to roll */ + if (new Date() % 2 == 0) { + print("ROLLING OPLOG AS PART OF TEST (we only do this sometimes)"); + var pass = 1; + var first = a.getSisterDB("local").oplog.rs.find().sort({ $natural: 1 }).limit(1)[0]; + a.roll.insert({ x: 1 }); + while (1) { + for (var i = 0; i < 10000; i++) + a.roll.update({}, { $inc: { x: 1} }); + var op = a.getSisterDB("local").oplog.rs.find().sort({ $natural: 1 }).limit(1)[0]; + if (tojson(op.h) != tojson(first.h)) { + printjson(op); + printjson(first); + break; + } + pass++; + a.getLastError(2); // unlikely secondary isn't keeping up, but let's avoid possible intermittent issues with that. + } + print("PASSES FOR OPLOG ROLL: " + pass); + } + else { + print("NO ROLL"); + } + + a.bar.insert({ q: 1, a: "foo" }); + a.bar.insert({ q: 2, a: "foo", x: 1 }); + a.bar.insert({ q: 3, bb: 9, a: "foo" }); + + assert(a.bar.count() == 3, "t.count"); + + // wait for secondary to get this data + wait(function () { return b.bar.count() == 3; }); + + A.runCommand({ replSetTest: 1, blind: true }); + wait(function () { return B.isMaster().ismaster; }); + + b.bar.insert({ q: 4 }); + b.bar.insert({ q: 5 }); + b.bar.insert({ q: 6 }); + assert(b.bar.count() == 6, "u.count"); + + // a should not have the new data as it was in blind state. + B.runCommand({ replSetTest: 1, blind: true }); + A.runCommand({ replSetTest: 1, blind: false }); + wait(function () { return !B.isMaster().ismaster; }); + wait(function () { return A.isMaster().ismaster; }); + + assert(a.bar.count() == 3, "t is 3"); + a.bar.insert({ q: 7 }); + a.bar.insert({ q: 8 }); + { + assert(a.bar.count() == 5); + var x = a.bar.find().toArray(); + assert(x[0].q == 1, '1'); + assert(x[1].q == 2, '2'); + assert(x[2].q == 3, '3'); + assert(x[3].q == 7, '7'); + assert(x[4].q == 8, '8'); + } + + // A is 1 2 3 7 8 + // B is 1 2 3 4 5 6 + + // bring B back online + B.runCommand({ replSetTest: 1, blind: false }); + + wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; }); + + // everyone is up here... + assert(A.isMaster().ismaster || A.isMaster().secondary, "A up"); + assert(B.isMaster().ismaster || B.isMaster().secondary, "B up"); + + friendlyEqual(a.bar.find().sort({ _id: 1 }).toArray(), b.bar.find().sort({ _id: 1 }).toArray(), "server data sets do not match"); + + pause("rollback.js SUCCESS"); + replTest.stopSet(signal); +} + +print("rollback.js"); +doTest( 15 ); diff -Nru mongodb-1.4.4/jstests/replsets/sync1.js mongodb-1.6.3/jstests/replsets/sync1.js --- mongodb-1.4.4/jstests/replsets/sync1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/replsets/sync1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,211 @@ +// test rollback of replica sets + +var debugging=0; + +w = 0; + +function pause(s) { + // for debugging just to keep processes running + print("\nsync1.js: " + s); + if (debugging) { + while (1) { + print("\nsync1.js: " + s); + sleep(4000); + } + } +} + +doTest = function (signal) { + + var replTest = new ReplSetTest({ name: 'testSet', nodes: 3 }); + var nodes = replTest.startSet({ oplogSize: "40" }); + + sleep(5000); + + print("\nsync1.js ********************************************************************** part 0"); + replTest.initiate(); + + // get master + print("\nsync1.js ********************************************************************** part 1"); + var master = replTest.getMaster(); + print("\nsync1.js ********************************************************************** part 2"); + var dbs = [master.getDB("foo")]; + + for (var i in nodes) { + if (nodes[i] + "" == master + "") { + continue; + } + dbs.push(nodes[i].getDB("foo")); + nodes[i].setSlaveOk(); + } + + print("\nsync1.js ********************************************************************** part 3"); + dbs[0].bar.drop(); + + print("\nsync1.js ********************************************************************** part 4"); + // slow things down a bit + dbs[0].bar.ensureIndex({ x: 1 }); + dbs[0].bar.ensureIndex({ y: 1 }); + dbs[0].bar.ensureIndex({ z: 1 }); + dbs[0].bar.ensureIndex({ w: 1 }); + + var ok = false; + var inserts = 100000; + + print("\nsync1.js ********************************************************************** part 5"); + + for (var i = 0; i < inserts; i++) { + dbs[0].bar.insert({ x: "foo" + i, y: "bar" + i, z: i, w: "biz baz bar boo" }); + } + + var status; + do { + sleep(1000); + status = dbs[0].getSisterDB("admin").runCommand({ replSetGetStatus: 1 }); + } while (status.members[1].state != 2 && status.members[2].state != 2); + + print("\nsync1.js ********************************************************************** part 6"); + dbs[0].getSisterDB("admin").runCommand({ replSetTest: 1, blind: true }); + + print("\nsync1.js ********************************************************************** part 7"); + + sleep(5000); + + var max1; + var max2; + var count = 0; + while (1) { + try { + max1 = dbs[1].bar.find().sort({ z: -1 }).limit(1).next(); + max2 = dbs[2].bar.find().sort({ z: -1 }).limit(1).next(); + } + catch (e) { + print("\nsync1.js couldn't get max1/max2; retrying " + e); + sleep(2000); + count++; + if (count == 50) { + assert(false, "errored out 50 times"); + } + continue; + } + break; + } + + // wait for a new master to be elected + sleep(5000); + var newMaster; + + print("\nsync1.js ********************************************************************** part 9"); + + for (var q = 0; q < 10; q++) { + // figure out who is master now + newMaster = replTest.getMaster(); + if (newMaster + "" != master + "") + break; + sleep(2000); + if (q > 6) print("sync1.js zzz...."); + } + + assert(newMaster + "" != master + "", "new master is " + newMaster + ", old master was " + master); + + print("\nsync1.js new master is " + newMaster + ", old master was " + master); + + print("\nsync1.js ********************************************************************** part 9.1"); + + count = 0; + countExceptions = 0; + do { + try { + max1 = dbs[1].bar.find().sort({ z: -1 }).limit(1).next(); + max2 = dbs[2].bar.find().sort({ z: -1 }).limit(1).next(); + } + catch (e) { + if (countExceptions++ > 300) { + print("dbs[1]:"); + try { + printjson(dbs[1].isMaster()); + printjson(dbs[1].bar.count()); + } + catch (e) { print(e); } + print("dbs[2]:"); + try { + printjson(dbs[2].isMaster()); + printjson(dbs[2].bar.count()); + } + catch (e) { print(e); } + assert(false, "sync1.js too many exceptions, failing"); + } + print("\nsync1.js: exception querying; will sleep and try again " + e); + sleep(3000); + continue; + } + + print("\nsync1.js waiting for match " + count + " " + Date() + " z[1]:" + max1.z + " z[2]:" + max2.z); + + // printjson(max1); + // printjson(max2); + + sleep(2000); + + count++; + if (count == 100) { + pause("fail phase 1"); + assert(false, "replsets/\nsync1.js fails timing out"); + replTest.stopSet(signal); + return; + } + } while (max1.z != max2.z); + + // okay, now they're caught up. We have a max: max1.z + + print("\nsync1.js ********************************************************************** part 10"); + + // now, let's see if rollback works + var result = dbs[0].getSisterDB("admin").runCommand({ replSetTest: 1, blind: false }); + dbs[0].getMongo().setSlaveOk(); + + printjson(result); + sleep(5000); + + // now this should resync + print("\nsync1.js ********************************************************************** part 11"); + var max0 = null; + count = 0; + do { + try { + max0 = dbs[0].bar.find().sort({ z: -1 }).limit(1).next(); + max1 = dbs[1].bar.find().sort({ z: -1 }).limit(1).next(); + } + catch (e) { + print("\nsync1.js part 11 exception on bar.find() will sleep and try again " + e); + sleep(2000); + continue; + } + + print("part 11"); + if (max0) { + print("max0.z:" + max0.z); + print("max1.z:" + max1.z); + } + + sleep(2000); + + count++; + if (count == 100) { + pause("FAIL part 11"); + assert(false, "replsets/\nsync1.js fails timing out"); + replTest.stopSet(signal); + return; + } + //print("||||| count:" + count); + //printjson(max0); + } while (!max0 || max0.z != max1.z); + + print("\nsync1.js ********************************************************************** part 12"); + pause("\nsync1.js success"); + replTest.stopSet(signal); +} + +if( 1 || debugging ) { + doTest( 15 ); +} diff -Nru mongodb-1.4.4/jstests/replsets/two_initsync.js mongodb-1.6.3/jstests/replsets/two_initsync.js --- mongodb-1.4.4/jstests/replsets/two_initsync.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/replsets/two_initsync.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,93 @@ +// test initial sync failing + +// try running as : +// +// mongo --nodb two_initsync.js | tee out | grep -v ^m31 +// + +var debugging = 0; + +function pause(s) { + print(s); + while (debugging) { + sleep(3000); + print(s); + } +} + +function deb(obj) { + if( debugging ) { + print("\n\n\n" + obj + "\n\n"); + } +} + +w = 0; + +function wait(f) { + w++; + var n = 0; + while (!f()) { + if( n % 4 == 0 ) + print("twoinitsync waiting " + w); + if (++n == 4) { + print("" + f); + } + sleep(1000); + } +} + +doTest = function (signal) { + var replTest = new ReplSetTest({ name: 'testSet', nodes: 0 }); + + var first = replTest.add(); + + // Initiate replica set + assert.soon(function () { + var res = first.getDB("admin").runCommand({ replSetInitiate: null }); + return res['ok'] == 1; + }); + + // Get status + assert.soon(function () { + var result = first.getDB("admin").runCommand({ replSetGetStatus: true }); + return result['ok'] == 1; + }); + + var a = replTest.getMaster().getDB("two"); + for (var i = 0; i < 20000; i++) + a.coll.insert({ i: i, s: "a b" }); + + // Start a second node + var second = replTest.add(); + + // Add the second node. + // This runs the equivalent of rs.add(newNode); + replTest.reInitiate(); + + var b = second.getDB("admin"); + + // attempt to interfere with the initial sync + b._adminCommand({ replSetTest: 1, forceInitialSyncFailure: 1 }); + + // wait(function () { return a._adminCommand("replSetGetStatus").members.length == 2; }); + + wait(function () { return b.isMaster().secondary || b.isMaster().ismaster; }); + + print("b.isMaster:"); + printjson(b.isMaster()); + + second.setSlaveOk(); + + print("b.isMaster:"); + printjson(b.isMaster()); + + wait(function () { var c = b.getSisterDB("two").coll.count(); print(c); return c == 20000; }); + + print("two_initsync.js SUCCESS"); + + replTest.stopSet(signal); +} + + +print("two_initsync.js"); +doTest( 15 ); diff -Nru mongodb-1.4.4/jstests/replsets/twosets.js mongodb-1.6.3/jstests/replsets/twosets.js --- mongodb-1.4.4/jstests/replsets/twosets.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/replsets/twosets.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,36 @@ +// add a node from a different set to the current set +// I don't know what should happen here. + +doTest = function( signal ) { + + var orig = new ReplSetTest( {name: 'testSet', nodes: 3} ); + orig.startSet(); + + var interloper = new ReplSetTest( {name: 'testSet', nodes: 3, startPort : 31003} ); + interloper.startSet(); + + sleep(5000); + + orig.initiate(); + interloper.initiate(); + + sleep(5000); + + var master = orig.getMaster(); + + var conf = master.getDB("local").system.replset.findOne(); + + var nodes = interloper.nodeList(); + var host = nodes[0]; + var id = conf.members.length; + conf.members.push({_id : id, host : host}); + conf.version++; + + var result = master.getDB("admin").runCommand({replSetReconfig : conf}); + + // now... stuff should blow up? + + sleep(10); +} + +doTest(15); \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/rs/rs_basic.js mongodb-1.6.3/jstests/rs/rs_basic.js --- mongodb-1.4.4/jstests/rs/rs_basic.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/rs/rs_basic.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,177 @@ +// rs_basic.js + +load("../../jstests/rs/test_framework.js"); + +function go() { + assert(__nextPort == 27000, "_nextPort==27000"); + + a = null; + try {init + a = new Mongo("localhost:27000"); + print("using already open mongod on port 27000 -- presume you are debugging or something. should start empty."); + __nextPort++; + } + catch (e) { + a = rs_mongod(); + } + + b = rs_mongod(); + + x = a.getDB("admin"); + y = b.getDB("admin"); + memb = []; + memb[0] = x; + memb[1] = y; + + print("rs_basic.js go(): started 2 servers"); + + cfg = { _id: 'asdf', members: [] }; + var hn = hostname(); + cfg.members[0] = { _id: 0, host: hn + ":27000" }; + cfg.members[1] = { _id: 1, host: hn + ":27001" }; + + print("cfg=" + tojson(cfg)); +} + +function init(server) { + var i = server; + //i = Random.randInt(2); // a random member of the set + var m = memb[i]; + assert(!m.ismaster(), "not ismaster"); + var res = m.runCommand({ replSetInitiate: cfg }); + return res; +} + +_path = '../../db/Debug/'; +print("_path var set to " + _path); + +print("go() to run"); +print("init() to initiate"); + + +/* +var rt = new ReplTest( "basic1" ); + +m = rt.start( true ); +s = rt.start( false ); + +function block(){ + am.runCommand( { getlasterror : 1 , w : 2 , wtimeout : 3000 } ) +} + +am = m.getDB( "foo" ); +as = s.getDB( "foo" ); + +function check( note ){ + var start = new Date(); + var x,y; + while ( (new Date()).getTime() - start.getTime() < 30000 ){ + x = am.runCommand( "dbhash" ); + y = as.runCommand( "dbhash" ); + if ( x.md5 == y.md5 ) + return; + sleep( 200 ); + } + assert.eq( x.md5 , y.md5 , note ); +} + +am.a.save( { x : 1 } ); +check( "A" ); + +am.a.save( { x : 5 } ); + +am.a.update( {} , { $inc : { x : 1 } } ); +check( "B" ); + +am.a.update( {} , { $inc : { x : 1 } } , false , true ); +check( "C" ); + +// ----- check features ------- + +// map/reduce +am.mr.insert( { tags : [ "a" ] } ) +am.mr.insert( { tags : [ "a" , "b" ] } ) +am.getLastError(); +check( "mr setup" ); + +m = function(){ + for ( var i=0; i>>>>>>>>>>>>>> skipping " + x.name); return; @@ -22,3 +23,6 @@ ); +var runnerEnd = new Date() + +print( "total runner time: " + ( ( runnerEnd.getTime() - runnerStart.getTime() ) / 1000 ) + "secs" ) diff -Nru mongodb-1.4.4/jstests/sharding/addshard1.js mongodb-1.6.3/jstests/sharding/addshard1.js --- mongodb-1.4.4/jstests/sharding/addshard1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/sharding/addshard1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,56 @@ +s = new ShardingTest( "add_shard1", 1 ); + +assert.eq( 1, s.config.shards.count(), "initial server count wrong" ); + +// create a shard and add a database; if the database is not duplicated the mongod should accepted +// it as shard +conn1 = startMongodTest( 29000 ); + +db1 = conn1.getDB( "testDB" ); +numObjs = 0; +for (i=0; i<3; i++){ + db1.foo.save( { a : i } ); + numObjs++; +} +db1.getLastError() + +newShard = "myShard"; +assert( s.admin.runCommand( { addshard: "localhost:29000" , name: newShard } ).ok, "did not accepted non-duplicated shard" ); + +// a mongod with an existing database name should not be allowed to become a shard +conn2 = startMongodTest( 29001 ); +db2 = conn2.getDB( "otherDB" ); +db2.foo.save( {a:1} ); +db2.getLastError() +db3 = conn2.getDB( "testDB" ); +db3.foo.save( {a:1} ); +db3.getLastError() + +s.config.databases.find().forEach( printjson ) +rejectedShard = "rejectedShard"; +assert( ! s.admin.runCommand( { addshard: "localhost:29001" , name : rejectedShard } ).ok, "accepted mongod with duplicate db" ); + +// check that all collection that were local to the mongod's are accessible through the mongos +sdb1 = s.getDB( "testDB" ); +assert.eq( numObjs , sdb1.foo.count() , "wrong count for database that existed before addshard" ); +sdb2 = s.getDB( "otherDB" ); +assert.eq( 0 , sdb2.foo.count() , "database of rejected shard appears through mongos" ); + +// make sure we can move a DB from the original mongod to a previoulsy existing shard +assert.eq( s.normalize( s.config.databases.findOne( { _id : "testDB" } ).primary ), newShard , "DB primary is wrong" ); +origShard = s.getNonPrimaries( "testDB" )[0]; +s.adminCommand( { moveprimary : "testDB" , to : origShard } ); +assert.eq( s.normalize( s.config.databases.findOne( { _id : "testDB" } ).primary ), origShard , "DB primary didn't move" ); +assert.eq( numObjs , sdb1.foo.count() , "wrong count after moving datbase that existed before addshard" ); + +// make sure we can shard the original collections +sdb1.foo.ensureIndex( { a : 1 } ) // can't shard populated collection without an index +s.adminCommand( { enablesharding : "testDB" } ); +s.adminCommand( { shardcollection : "testDB.foo" , key: { a : 1 } } ); +s.adminCommand( { split : "testDB.foo", middle: { a : Math.floor(numObjs/2) } } ); +assert.eq( 2 , s.config.chunks.count(), "wrong chunk number after splitting collection that existed before" ); +assert.eq( numObjs , sdb1.foo.count() , "wrong count after splitting collection that existed before" ); + +stopMongod( 29000 ); +stopMongod( 29001 ); +s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/addshard2.js mongodb-1.6.3/jstests/sharding/addshard2.js --- mongodb-1.4.4/jstests/sharding/addshard2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/sharding/addshard2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,52 @@ + +// don't start any shards, yet +s = new ShardingTest( "add_shard2", 1, 0, 1, {useHostname : true} ); + +var conn1 = startMongodTest( 30001 , "add_shard21" , 0 , {useHostname : true} ); +var conn2 = startMongodTest( 30002 , "add_shard22" , 0 , {useHostname : true} ); + +var rs1 = new ReplSetTest( { "name" : "add_shard2_rs1", nodes : 3 , startPort : 31200 } ); +rs1.startSet(); +rs1.initiate(); +var master1 = rs1.getMaster(); + +var rs2 = new ReplSetTest( { "name" : "add_shard2_rs2", nodes : 3 , startPort : 31203 } ); +rs2.startSet(); +rs2.initiate(); +var master2 = rs2.getMaster(); + +// step 1. name given +assert(s.admin.runCommand({"addshard" : getHostName()+":30001", "name" : "bar"}).ok, "failed to add shard in step 1"); +var shard = s.getDB("config").shards.findOne({"_id" : {"$nin" : ["shard0000"]}}); +assert(shard, "shard wasn't found"); +assert.eq("bar", shard._id, "shard has incorrect name"); + +// step 2. replica set +assert(s.admin.runCommand({"addshard" : "add_shard2_rs1/"+getHostName()+":31200"}).ok, "failed to add shard in step 2"); +shard = s.getDB("config").shards.findOne({"_id" : {"$nin" : ["shard0000", "bar"]}}); +assert(shard, "shard wasn't found"); +assert.eq("add_shard2_rs1", shard._id, "t2 name"); + +// step 3. replica set w/ name given +assert(s.admin.runCommand({"addshard" : "add_shard2_rs2/"+getHostName()+":31203", "name" : "myshard"}).ok, + "failed to add shard in step 4"); +shard = s.getDB("config").shards.findOne({"_id" : {"$nin" : ["shard0000", "bar", "add_shard2_rs1"]}}); +assert(shard, "shard wasn't found"); +assert.eq("myshard", shard._id, "t3 name"); + +// step 4. no name given +assert(s.admin.runCommand({"addshard" : getHostName()+":30002"}).ok, "failed to add shard in step 4"); +shard = s.getDB("config").shards.findOne({"_id" : {"$nin" : ["shard0000", "bar", "add_shard2_rs1", "myshard"]}}); +assert(shard, "shard wasn't found"); +assert.eq("shard0001", shard._id, "t4 name"); + +assert.eq(s.getDB("config").shards.count(), 5, "unexpected number of shards"); + +// step 5. replica set w/ a wrong host +assert(!s.admin.runCommand({"addshard" : "add_shard2_rs2/NonExistingHost:31203"}).ok, "accepted bad hostname in step 5"); + +// step 6. replica set w/ mixed wrong/right hosts +assert(!s.admin.runCommand({"addshard" : "add_shard2_rs2/"+getHostName()+":31203,foo:9999"}).ok, + "accepted bad hostname in step 6"); + +s.stop(); \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/sharding/auto1.js mongodb-1.6.3/jstests/sharding/auto1.js --- mongodb-1.4.4/jstests/sharding/auto1.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/auto1.js 2010-09-24 10:02:42.000000000 -0700 @@ -14,38 +14,50 @@ var i=0; -for ( ; i<500; i++ ){ +for ( ; i<100; i++ ){ coll.save( { num : i , s : bigString } ); } - -s.adminCommand( "connpoolsync" ); +db.getLastError(); primary = s.getServer( "test" ).getDB( "test" ); -assert.eq( 1 , s.config.chunks.count() ); -assert.eq( 500 , primary.foo.count() ); +counts = [] + +s.printChunks(); +counts.push( s.config.chunks.count() ); +assert.eq( 100 , primary.foo.count() ); print( "datasize: " + tojson( s.getServer( "test" ).getDB( "admin" ).runCommand( { datasize : "test.foo" } ) ) ); -for ( ; i<800; i++ ){ +for ( ; i<200; i++ ){ coll.save( { num : i , s : bigString } ); } -assert.eq( 1 , s.config.chunks.count() ); +s.printChunks() +counts.push( s.config.chunks.count() ); -for ( ; i<1500; i++ ){ +for ( ; i<400; i++ ){ coll.save( { num : i , s : bigString } ); } -assert.eq( 3 , s.config.chunks.count() , "shard didn't split A " ); s.printChunks(); +counts.push( s.config.chunks.count() ); -for ( ; i<3000; i++ ){ +for ( ; i<700; i++ ){ coll.save( { num : i , s : bigString } ); } +db.getLastError(); -assert.eq( 4 , s.config.chunks.count() , "shard didn't split B " ); s.printChunks(); +counts.push( s.config.chunks.count() ); + +assert( counts[counts.length-1] > counts[0] , "counts 1 : " + tojson( counts ) ) +sorted = counts.slice(0) +sorted.sort(); +assert.eq( counts , sorted , "counts 2 : " + tojson( counts ) ) + +print( counts ) +printjson( db.stats() ) s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/auto2.js mongodb-1.6.3/jstests/sharding/auto2.js --- mongodb-1.4.4/jstests/sharding/auto2.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/auto2.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,6 +1,6 @@ // auto2.js -s = new ShardingTest( "auto2" , 2 , 1 , 1 ); +s = new ShardingTest( "auto2" , 2 , 5 , 2 ); s.adminCommand( { enablesharding : "test" } ); s.adminCommand( { shardcollection : "test.foo" , key : { num : 1 } } ); @@ -26,19 +26,116 @@ ) ); } +assert.eq( i , j * 100 , "setup" ); s.adminCommand( "connpoolsync" ); +db.getLastError(); print( "done inserting data" ); print( "datasize: " + tojson( s.getServer( "test" ).getDB( "admin" ).runCommand( { datasize : "test.foo" } ) ) ); s.printChunks(); -counta = s._connections[0].getDB( "test" ).foo.count(); -countb = s._connections[1].getDB( "test" ).foo.count(); +function doCountsGlobal(){ + counta = s._connections[0].getDB( "test" ).foo.count(); + countb = s._connections[1].getDB( "test" ).foo.count(); + return counta + countb; +} + +doCountsGlobal() + +assert( counta > 0 , "diff1" ); +assert( countb > 0 , "diff2" ); + +print( "checkpoint B" ) + +var missing = []; + +for ( i=0; i 0 ) + s += ","; + s += o.name; + } + return s; +} + +assert.eq( "allan,bob,eliot,joe,mark,sara" , nameString( db.foo.find().sort( { name : 1 } ) ) , "sort 1" ); +assert.eq( "sara,mark,joe,eliot,bob,allan" , nameString( db.foo.find().sort( { name : -1 } ) ) , "sort 2" ); + +assert.eq( 2 , db.foo.find().limit(2).itcount() , "LS1" ) +assert.eq( 2 , db.foo.find().skip(2).limit(2).itcount() , "LS2" ) +assert.eq( 1 , db.foo.find().skip(5).limit(2).itcount() , "LS3" ) +assert.eq( 6 , db.foo.find().limit(2).count() , "LSC1" ) +assert.eq( 2 , db.foo.find().limit(2).size() , "LSC2" ) +assert.eq( 2 , db.foo.find().skip(2).limit(2).size() , "LSC3" ) +assert.eq( 1 , db.foo.find().skip(5).limit(2).size() , "LSC4" ) + +assert.eq( "allan,bob" , nameString( db.foo.find().sort( { name : 1 } ).limit(2) ) , "LSD1" ) +assert.eq( "bob,eliot" , nameString( db.foo.find().sort( { name : 1 } ).skip(1).limit(2) ) , "LSD2" ) +assert.eq( "joe,mark" , nameString( db.foo.find().sort( { name : 1 } ).skip(3).limit(2) ) , "LSD3" ) + +assert.eq( "eliot,sara" , nameString( db.foo.find().sort( { _id : 1 } ).limit(2) ) , "LSE1" ) +assert.eq( "sara,bob" , nameString( db.foo.find().sort( { _id : 1 } ).skip(1).limit(2) ) , "LSE2" ) +assert.eq( "joe,mark" , nameString( db.foo.find().sort( { _id : 1 } ).skip(3).limit(2) ) , "LSE3" ) + +for ( i=0; i<10; i++ ){ + db.foo.save( { _id : 7 + i , name : "zzz" + i } ) +} + +assert.eq( 10 , db.foo.find( { name : { $gt : "z" } } ).itcount() , "LSF1" ) +assert.eq( 10 , db.foo.find( { name : { $gt : "z" } } ).sort( { _id : 1 } ).itcount() , "LSF2" ) +assert.eq( 5 , db.foo.find( { name : { $gt : "z" } } ).sort( { _id : 1 } ).skip(5).itcount() , "LSF3" ) +sleep( 5000 ) +assert.eq( 3 , db.foo.find( { name : { $gt : "z" } } ).sort( { _id : 1 } ).skip(5).limit(3).itcount() , "LSF4" ) s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/count2.js mongodb-1.6.3/jstests/sharding/count2.js --- mongodb-1.4.4/jstests/sharding/count2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/sharding/count2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,43 @@ +// count2.js + +s1 = new ShardingTest( "count2" , 2 , 1 , 2 ); +s2 = s1._mongos[1]; + +s1.adminCommand( { enablesharding: "test" } ); +s1.adminCommand( { shardcollection: "test.foo" , key : { name : 1 } } ); + +db1 = s1.getDB( "test" ).foo; +db2 = s2.getDB( "test" ).foo; + +assert.eq( 1, s1.config.chunks.count(), "sanity check A"); + +db1.save( { name : "aaa" } ) +db1.save( { name : "bbb" } ) +db1.save( { name : "ccc" } ) +db1.save( { name : "ddd" } ) +db1.save( { name : "eee" } ) +db1.save( { name : "fff" } ) + +s1.adminCommand( { split : "test.foo" , middle : { name : "ddd" } } ); + +assert.eq( 3, db1.count( { name : { $gte: "aaa" , $lt: "ddd" } } ) , "initial count mongos1" ); +assert.eq( 3, db2.count( { name : { $gte: "aaa" , $lt: "ddd" } } ) , "initial count mongos2" ); + +s1.printChunks( "test.foo" ) + +s1.adminCommand( { movechunk : "test.foo" , find : { name : "aaa" } , to : s1.getOther( s1.getServer( "test" ) ).name } ); + +assert.eq( 3, db1.count( { name : { $gte: "aaa" , $lt: "ddd" } } ) , "post count mongos1" ); + +// The second mongos still thinks its shard mapping is valid and accepts a cound +print( "before sleep: " + Date() ) +sleep( 2000 ) +print( "after sleep: " + Date() ) +s1.printChunks( "test.foo" ) +assert.eq( 3, db2.find( { name : { $gte: "aaa" , $lt: "ddd" } } ).count() , "post count mongos2" ); + +db2.findOne(); + +assert.eq( 3, db2.count( { name : { $gte: "aaa" , $lt: "ddd" } } ) ); + +s1.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/cursor1.js mongodb-1.6.3/jstests/sharding/cursor1.js --- mongodb-1.4.4/jstests/sharding/cursor1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/sharding/cursor1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,60 @@ +// cursor1.js +// checks that cursors survive a chunk's move + +s = new ShardingTest( "sharding_cursor1" , 2 , 2 ) + +// take the balancer out of the equation +s.config.settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true ); +s.config.settings.find().forEach( printjson ) + +// create a sharded 'test.foo', for the moment with just one chunk +s.adminCommand( { enablesharding: "test" } ); +s.adminCommand( { shardcollection: "test.foo", key: { _id: 1 } } ) + +db = s.getDB( "test" ); +primary = s.getServer( "test" ).getDB( "test" ); +secondary = s.getOther( primary ).getDB( "test" ); + +numObjs = 10; +for (i=0; i < numObjs; i++){ + db.foo.insert({_id: i}); +} +db.getLastError(); +assert.eq( 1, s.config.chunks.count() , "test requires collection to have one chunk initially" ); + +// we'll split the collection in two and move the second chunk while three cursors are open +// cursor1 still has more data in the first chunk, the one that didn't move +// cursor2 buffered the last obj of the first chunk +// cursor3 buffered data that was moved on the second chunk +var cursor1 = db.foo.find().batchSize( 3 ); +assert.eq( 3 , cursor1.objsLeftInBatch() ); +var cursor2 = db.foo.find().batchSize( 5 ); +assert.eq( 5 , cursor2.objsLeftInBatch() ); +var cursor3 = db.foo.find().batchSize( 7 ); +assert.eq( 7 , cursor3.objsLeftInBatch() ); + +s.adminCommand( { split: "test.foo" , middle : { _id : 5 } } ); +s.adminCommand( { movechunk : "test.foo" , find : { _id : 5 } , to : secondary.getMongo().name } ); +assert.eq( 2, s.config.chunks.count() ); + +// the cursors should not have been affected +assert.eq( numObjs , cursor1.itcount() , "c1" ); +assert.eq( numObjs , cursor2.itcount() , "c2" ); +assert.eq( numObjs , cursor3.itcount() , "c3" ); + +// test timeout +gc(); gc(); +cur = db.foo.find().batchSize( 2 ) +assert( cur.next() , "T1" ) +assert( cur.next() , "T2" ); +before = db.runCommand( { "cursorInfo" : 1 , "setTimeout" : 10000 } ) // 10 seconds +printjson( before ) +sleep( 6000 ) +assert( cur.next() , "T3" ) +assert( cur.next() , "T4" ); +sleep( 22000 ) +assert.throws( function(){ cur.next(); } , "T5" ) +after = db.runCommand( { "cursorInfo" : 1 , "setTimeout" : 10000 } ) // 10 seconds +gc(); gc() + +s.stop() diff -Nru mongodb-1.4.4/jstests/sharding/diffservers1.js mongodb-1.6.3/jstests/sharding/diffservers1.js --- mongodb-1.4.4/jstests/sharding/diffservers1.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/diffservers1.js 2010-09-24 10:02:42.000000000 -0700 @@ -14,7 +14,7 @@ assert( ! s.admin.runCommand( { addshard: "sdd$%" } ).ok , "bad hostname" ); assert( ! s.admin.runCommand( { addshard: "127.0.0.1:43415" } ).ok , "host not up" ); -assert( ! s.admin.runCommand( { addshard: "127.0.0.1:43415" , allowLocal : true } ).ok , "host not up" ); +assert( ! s.admin.runCommand( { addshard: "10.0.0.1:43415" } ).ok , "allowed shard in IP when config is localhost" ); s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/error1.js mongodb-1.6.3/jstests/sharding/error1.js --- mongodb-1.4.4/jstests/sharding/error1.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/error1.js 2010-09-24 10:02:42.000000000 -0700 @@ -18,19 +18,21 @@ s.adminCommand( { shardcollection : "test.foo2" , key : { num : 1 } } ); -db.foo2.insert( { _id : 1 , num : 5 } ); -db.foo2.insert( { _id : 2 , num : 10 } ); -db.foo2.insert( { _id : 3 , num : 15 } ); -db.foo2.insert( { _id : 4 , num : 20 } ); +db.foo2.save( { _id : 1 , num : 5 } ); +db.foo2.save( { _id : 2 , num : 10 } ); +db.foo2.save( { _id : 3 , num : 15 } ); +db.foo2.save( { _id : 4 , num : 20 } ); s.adminCommand( { split : "test.foo2" , middle : { num : 10 } } ); s.adminCommand( { movechunk : "test.foo2" , find : { num : 20 } , to : s.getOther( s.getServer( "test" ) ).name } ); +print( "a: " + a.foo2.count() ); +print( "b: " + b.foo2.count() ); assert( a.foo2.count() > 0 && a.foo2.count() < 4 , "se1" ); assert( b.foo2.count() > 0 && b.foo2.count() < 4 , "se2" ); assert.eq( 4 , db.foo2.count() , "se3" ); -db.foo2.insert( { _id : 5 , num : 25 } ); +db.foo2.save( { _id : 5 , num : 25 } ); assert( ! db.getLastError() , "se3.5" ); s.sync(); assert.eq( 5 , db.foo2.count() , "se4" ); @@ -43,5 +45,17 @@ assert.eq( 5 , db.foo2.count() , "se5" ); + +// assert in mongos +s.adminCommand( { shardcollection : "test.foo3" , key : { num : 1 } } ); +assert.isnull(db.getLastError() , "gle C1" ); + +db.foo3.insert({}); //this fails with no shard key error +assert(db.getLastError() , "gle C2a" ); +assert(db.getLastError() , "gle C2b" ); + +db.foo3.insert({num:1}); +assert.isnull(db.getLastError() , "gle C3a" ); + // ---- s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/features1.js mongodb-1.6.3/jstests/sharding/features1.js --- mongodb-1.4.4/jstests/sharding/features1.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/features1.js 2010-09-24 10:02:42.000000000 -0700 @@ -50,10 +50,15 @@ assert.eq( 4 , a.foo.getIndexKeys().length , "a index 3" ); assert.eq( 4 , b.foo.getIndexKeys().length , "b index 3" ); +db.foo.ensureIndex( { num : 1 , bar : 1 } , true ); +s.sync(); +assert.eq( 5 , b.foo.getIndexKeys().length , "c index 3" ); + // ---- can't shard thing with unique indexes db.foo2.ensureIndex( { a : 1 } ); s.sync(); +printjson( db.system.indexes.find( { ns : "test.foo2" } ).toArray() ); assert( s.admin.runCommand( { shardcollection : "test.foo2" , key : { num : 1 } } ).ok , "shard with index" ); db.foo3.ensureIndex( { a : 1 } , true ); @@ -61,6 +66,12 @@ printjson( db.system.indexes.find( { ns : "test.foo3" } ).toArray() ); assert( ! s.admin.runCommand( { shardcollection : "test.foo3" , key : { num : 1 } } ).ok , "shard with unique index" ); +db.foo7.ensureIndex( { num : 1 , a : 1 } , true ); +s.sync(); +printjson( db.system.indexes.find( { ns : "test.foo7" } ).toArray() ); +assert( s.admin.runCommand( { shardcollection : "test.foo7" , key : { num : 1 } } ).ok , "shard with ok unique index" ); + + // ----- eval ----- db.foo2.save( { num : 5 , a : 7 } ); @@ -83,6 +94,7 @@ s.adminCommand( { movechunk : "test.foo4" , find : { num : 20 } , to : s.getOther( s.getServer( "test" ) ).name } ); db.foo4.save( { num : 5 } ); db.foo4.save( { num : 15 } ); +db.getLastError(); s.sync(); assert.eq( 1 , a.foo4.count() , "ua1" ); assert.eq( 1 , b.foo4.count() , "ub1" ); @@ -120,13 +132,15 @@ db.foo6.save( { a : 1 } ); db.foo6.save( { a : 3 } ); db.foo6.save( { a : 3 } ); +db.foo6.ensureIndex( { a : 1 } ); s.sync(); +printjson( db.system.indexes.find( { ns : "test.foo6" } ).toArray() ); assert.eq( 2 , db.foo6.group( { key : { a : 1 } , initial : { count : 0 } , reduce : function(z,prev){ prev.count++; } } ).length ); assert.eq( 3 , db.foo6.find().count() ); -assert( s.admin.runCommand( { shardcollection : "test.foo6" , key : { a : 2 } } ).ok ); +assert( s.admin.runCommand( { shardcollection : "test.foo6" , key : { a : 1 } } ).ok ); assert.eq( 3 , db.foo6.find().count() ); s.adminCommand( { split : "test.foo6" , middle : { a : 2 } } ); @@ -135,5 +149,16 @@ assert.throws( function(){ db.foo6.group( { key : { a : 1 } , initial : { count : 0 } , reduce : function(z,prev){ prev.count++; } } ); } );; +// ---- can't shard non-empty collection without index ----- + +db.foo8.save( { a : 1 } ); +assert( ! s.admin.runCommand( { shardcollection : "test.foo8" , key : { a : 1 } } ).ok , "non-empty collection" ); + +// --- listDatabases --- + +r = db.getMongo().getDBs() +assert.eq( 4 , r.databases.length , "listDatabases 1 : " + tojson( r ) ) +assert.lt( 10000 , r.totalSize , "listDatabases 2 : " + tojson( r ) ); + s.stop() diff -Nru mongodb-1.4.4/jstests/sharding/features2.js mongodb-1.6.3/jstests/sharding/features2.js --- mongodb-1.4.4/jstests/sharding/features2.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/features2.js 2010-09-24 10:02:42.000000000 -0700 @@ -13,6 +13,7 @@ db.foo.save( { x : 1 } ); db.foo.save( { x : 2 } ); db.foo.save( { x : 3 } ); +db.foo.ensureIndex( { x : 1 } ); assert.eq( "1,2,3" , db.foo.distinct( "x" ) , "distinct 1" ); assert( a.foo.distinct("x").length == 3 || b.foo.distinct("x").length == 3 , "distinct 2" ); @@ -51,25 +52,27 @@ // --- _id key --- -db.foo2.insert( { _id : new ObjectId() } ); -db.foo2.insert( { _id : new ObjectId() } ); -db.foo2.insert( { _id : new ObjectId() } ); +db.foo2.save( { _id : new ObjectId() } ); +db.foo2.save( { _id : new ObjectId() } ); +db.foo2.save( { _id : new ObjectId() } ); +db.getLastError(); assert.eq( 1 , s.onNumShards( "foo2" ) , "F1" ); +printjson( db.system.indexes.find( { ns : "test.foo2" } ).toArray() ); s.adminCommand( { shardcollection : "test.foo2" , key : { _id : 1 } } ); assert.eq( 3 , db.foo2.count() , "F2" ) db.foo2.insert( {} ); assert.eq( 4 , db.foo2.count() , "F3" ) - // --- map/reduce db.mr.save( { x : 1 , tags : [ "a" , "b" ] } ); db.mr.save( { x : 2 , tags : [ "b" , "c" ] } ); db.mr.save( { x : 3 , tags : [ "c" , "a" ] } ); db.mr.save( { x : 4 , tags : [ "b" , "c" ] } ); +db.mr.ensureIndex( { x : 1 } ); m = function(){ this.tags.forEach( @@ -88,8 +91,12 @@ }; doMR = function( n ){ + print(n); + var res = db.mr.mapReduce( m , r ); printjson( res ); + assert.eq( new NumberLong(4) , res.counts.input , "MR T0 " + n ); + var x = db[res.result]; assert.eq( 3 , x.find().count() , "MR T1 " + n ); @@ -111,4 +118,42 @@ doMR( "after" ); +s.adminCommand({split:'test.mr' , middle:{x:3}} ); +s.adminCommand({split:'test.mr' , middle:{x:4}} ); +s.adminCommand({movechunk:'test.mr', find:{x:3}, to: s.getServer('test').name } ); + +doMR( "after extra split" ); + +cmd = { mapreduce : "mr" , map : "emit( " , reduce : "fooz + " }; + +x = db.runCommand( cmd ); +y = s._connections[0].getDB( "test" ).runCommand( cmd ); + +printjson( x ) +printjson( y ) + +// count + +db.countaa.save({"regex" : /foo/i}) +db.countaa.save({"regex" : /foo/i}) +db.countaa.save({"regex" : /foo/i}) +assert.eq( 3 , db.countaa.count() , "counta1" ); +assert.eq( 3 , db.countaa.find().itcount() , "counta1" ); + +x = null; y = null; +try { + x = db.runCommand( "forceerror" ) +} +catch ( e ){ + x = e; +} +try { + y = s._connections[0].getDB( "test" ).runCommand( "forceerror" ); +} +catch ( e ){ + y = e; +} + +assert.eq( x , y , "assert format" ) + s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/features3.js mongodb-1.6.3/jstests/sharding/features3.js --- mongodb-1.4.4/jstests/sharding/features3.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/sharding/features3.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,86 @@ + +s = new ShardingTest( "features3" , 2 , 1 , 1 ); +s.adminCommand( { enablesharding : "test" } ); + +a = s._connections[0].getDB( "test" ); +b = s._connections[1].getDB( "test" ); + +db = s.getDB( "test" ); + +// ---------- load some data ----- + +s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } ); +N = 10000; +s.adminCommand( { split : "test.foo" , middle : { _id : N/2 } } ) +s.adminCommand( { moveChunk : "test.foo", find : { _id : 3 } ,to : s.getNonPrimaries( "test" )[0] } ) + +for ( i=0; i= 0 , "fsync not on admin should fail : " + tojson( x ) ) + +x = db._adminCommand( "fsync" ) +assert( x.ok == 1 && x.numFiles > 0 , "fsync failed : " + tojson( x ) ) + +x = db._adminCommand( { "fsync" :1, lock:true } ) +assert( ! x.ok , "lock should fail: " + tojson( x ) ) + +s.stop() diff -Nru mongodb-1.4.4/jstests/sharding/findandmodify1.js mongodb-1.6.3/jstests/sharding/findandmodify1.js --- mongodb-1.4.4/jstests/sharding/findandmodify1.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/findandmodify1.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,53 +1,51 @@ -s = new ShardingTest( "find_and_modify_sharded" , 2 ); +s = new ShardingTest( "find_and_modify_sharded" , 2 , 2); s.adminCommand( { enablesharding : "test" } ); db = s.getDB( "test" ); primary = s.getServer( "test" ).getDB( "test" ); -seconday = s.getOther( primary ).getDB( "test" ); +secondary = s.getOther( primary ).getDB( "test" ); numObjs = 20; s.adminCommand( { shardcollection : "test.stuff" , key : {_id:1} } ); +// pre-split the collection so to avoid interference from balancer +s.adminCommand( { split: "test.stuff" , middle : { _id : numObjs/2 } } ); +s.adminCommand( { movechunk : "test.stuff" , find : { _id : numObjs/2 } , to : secondary.getMongo().name } ) ; + for (var i=0; i < numObjs; i++){ db.stuff.insert({_id: i}); } +db.getLastError() -for (var i=0; i < numObjs; i+=2){ +// put two docs in each chunk (avoid the split in 0, since there are no docs less than 0) +for (var i=2; i < numObjs; i+=2){ + if (i == numObjs/2) + continue; s.adminCommand( { split: "test.stuff" , middle : {_id: i} } ); } -for (var i=0; i < numObjs; i+=4){ - s.adminCommand( { movechunk : "test.stuff" , find : {_id: i} , to : seconday.getMongo().name } ); -} - -//sorted update -for (var i=0; i < numObjs; i++){ - assert.eq(db.stuff.count({a:1}), i, "1 A"); - - var out = db.stuff.findAndModify({query: {a:null}, update: {$set: {a:1}}, sort: {_id:1}}); - - assert.eq(db.stuff.count({a:1}), i+1, "1 B"); - assert.eq(db.stuff.findOne({_id:i}).a, 1, "1 C"); - assert.eq(out._id, i, "1 D"); -} +s.printChunks(); +assert.eq( numObjs/2, s.config.chunks.count(), "split failed" ); +assert.eq( numObjs/4, s.config.chunks.count({ shard: "shard0000" }) ); +assert.eq( numObjs/4, s.config.chunks.count({ shard: "shard0001" }) ); -// unsorted update +// update for (var i=0; i < numObjs; i++){ assert.eq(db.stuff.count({b:1}), i, "2 A"); - var out = db.stuff.findAndModify({query: {b:null}, update: {$set: {b:1}}}); + var out = db.stuff.findAndModify({query: {_id:i, b:null}, update: {$set: {b:1}}}); + assert.eq(out._id, i, "2 E"); assert.eq(db.stuff.count({b:1}), i+1, "2 B"); - assert.eq(db.stuff.findOne({_id:out._id}).a, 1, "2 C"); } -//sorted remove (no query) +// remove for (var i=0; i < numObjs; i++){ assert.eq(db.stuff.count(), numObjs - i, "3 A"); assert.eq(db.stuff.count({_id: i}), 1, "3 B"); - var out = db.stuff.findAndModify({remove: true, sort: {_id:1}}); + var out = db.stuff.findAndModify({remove: true, query: {_id:i}}); assert.eq(db.stuff.count(), numObjs - i - 1, "3 C"); assert.eq(db.stuff.count({_id: i}), 0, "3 D"); diff -Nru mongodb-1.4.4/jstests/sharding/key_many.js mongodb-1.6.3/jstests/sharding/key_many.js --- mongodb-1.4.4/jstests/sharding/key_many.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/key_many.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,14 +1,18 @@ // key_many.js // values have to be sorted +// you must have exactly 6 values in each array types = [ { name : "string" , values : [ "allan" , "bob" , "eliot" , "joe" , "mark" , "sara" ] , keyfield: "k" } , { name : "double" , values : [ 1.2 , 3.5 , 4.5 , 4.6 , 6.7 , 9.9 ] , keyfield : "a" } , { name : "date" , values : [ new Date( 1000000 ) , new Date( 2000000 ) , new Date( 3000000 ) , new Date( 4000000 ) , new Date( 5000000 ) , new Date( 6000000 ) ] , keyfield : "a" } , { name : "string_id" , values : [ "allan" , "bob" , "eliot" , "joe" , "mark" , "sara" ] , keyfield : "_id" }, - { name : "embedded" , values : [ "allan" , "bob" , "eliot" , "joe" , "mark" , "sara" ] , keyfield : "a.b" } , + { name : "embedded 1" , values : [ "allan" , "bob" , "eliot" , "joe" , "mark" , "sara" ] , keyfield : "a.b" } , { name : "embedded 2" , values : [ "allan" , "bob" , "eliot" , "joe" , "mark" , "sara" ] , keyfield : "a.b.c" } , { name : "object" , values : [ {a:1, b:1.2}, {a:1, b:3.5}, {a:1, b:4.5}, {a:2, b:1.2}, {a:2, b:3.5}, {a:2, b:4.5} ] , keyfield : "o" } , + { name : "compound" , values : [ {a:1, b:1.2}, {a:1, b:3.5}, {a:1, b:4.5}, {a:2, b:1.2}, {a:2, b:3.5}, {a:2, b:4.5} ] , keyfield : "o" , compound : true } , + { name : "oid_id" , values : [ ObjectId() , ObjectId() , ObjectId() , ObjectId() , ObjectId() , ObjectId() ] , keyfield : "_id" } , + { name : "oid_other" , values : [ ObjectId() , ObjectId() , ObjectId() , ObjectId() , ObjectId() , ObjectId() ] , keyfield : "o" } , ] s = new ShardingTest( "key_many" , 2 ); @@ -20,7 +24,18 @@ function makeObjectDotted( v ){ var o = {}; - o[curT.keyfield] = v; + if (curT.compound){ + var prefix = curT.keyfield + '.'; + if (typeof(v) == 'object'){ + for (key in v) + o[prefix + key] = v[key]; + } else { + for (key in curT.values[0]) + o[prefix + key] = v; + } + } else { + o[curT.keyfield] = v; + } return o; } @@ -39,6 +54,15 @@ return o; } +function makeInQuery(){ + if (curT.compound){ + // cheating a bit... + return {'o.a': {$in: [1,2]}}; + } else { + return makeObjectDotted({$in: curT.values}); + } +} + function getKey( o ){ var keys = curT.keyfield.split('.'); for(var i=0; i 0 ) + s += ","; + s += a[i]; + } + return s; +} + +forward = terse(forward); +backward = terse(backward); + +assert.eq( forward , getSorted( "num" , "num" , 1 ) , "D1" ) +assert.eq( backward , getSorted( "num" , "num" , -1 ) , "D2" ) + +assert.eq( backward , getSorted( "x" , "num" , 1 ) , "D3" ) +assert.eq( forward , getSorted( "x" , "num" , -1 ) , "D4" ) + +assert.eq( backward , getSorted( "x" , "num" , 1 , { num : 1 } ) , "D5" ) +assert.eq( forward , getSorted( "x" , "num" , -1 , { num : 1 } ) , "D6" ) + + +s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/splitpick.js mongodb-1.6.3/jstests/sharding/splitpick.js --- mongodb-1.4.4/jstests/sharding/splitpick.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/jstests/sharding/splitpick.js 2010-09-24 10:02:42.000000000 -0700 @@ -17,17 +17,23 @@ c.save( { a : i } ); } c.save( { a : 99 } ); +db.getLastError(); -assert.eq( s.admin.runCommand( { splitvalue : "test.foo" , find : { a : 1 } } ).middle.a , 1 , "splitvalue 1" ); -assert.eq( s.admin.runCommand( { splitvalue : "test.foo" , find : { a : 3 } } ).middle.a , 1 , "splitvalue 2" ); +function checkSplit( f, want , num ){ + x = s.admin.runCommand( { splitvalue : "test.foo" , find : { a : f } } ); + assert.eq( want, x.middle ? x.middle.a : null , "splitvalue " + num + " " + tojson( x ) ); +} + +checkSplit( 1 , 1 , "1" ) +checkSplit( 3 , 1 , "2" ) s.adminCommand( { split : "test.foo" , find : { a : 1 } } ); -assert.eq( s.admin.runCommand( { splitvalue : "test.foo" , find : { a : 3 } } ).middle.a , 99 , "splitvalue 3" ); +checkSplit( 3 , 99 , "3" ) s.adminCommand( { split : "test.foo" , find : { a : 99 } } ); assert.eq( s.config.chunks.count() , 3 ); s.printChunks(); -assert.eq( s.admin.runCommand( { splitvalue : "test.foo" , find : { a : 50 } } ).middle.a , 10 , "splitvalue 4 " ); +checkSplit( 50 , 10 , "4" ) s.stop(); diff -Nru mongodb-1.4.4/jstests/sharding/stats.js mongodb-1.6.3/jstests/sharding/stats.js --- mongodb-1.4.4/jstests/sharding/stats.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/sharding/stats.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,60 @@ +s = new ShardingTest( "stats" , 2 , 1 , 1 ); +s.adminCommand( { enablesharding : "test" } ); + +a = s._connections[0].getDB( "test" ); +b = s._connections[1].getDB( "test" ); + +db = s.getDB( "test" ); + +function numKeys(o){ + var num = 0; + for (var x in o) + num++; + return num; +} + +// ---------- load some data ----- + +// need collections sharded before and after main collection for proper test +s.adminCommand( { shardcollection : "test.aaa" , key : { _id : 1 } } ); +s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } ); // this collection is actually used +s.adminCommand( { shardcollection : "test.zzz" , key : { _id : 1 } } ); + + +N = 10000; +s.adminCommand( { split : "test.foo" , middle : { _id : N/2 } } ) +s.adminCommand( { moveChunk : "test.foo", find : { _id : 3 } ,to : s.getNonPrimaries( "test" )[0] } ) + +for ( i=0; i 0 , "blah 1" ); -assert( s._connections[1].getDB( "test" ).foo.find().toArray().length > 0 , "blah 2" ); +assert( s._connections[0].getDB( "test" ).foo.find().toArray().length > 0 , "shard 0 request" ); +assert( s._connections[1].getDB( "test" ).foo.find().toArray().length > 0 , "shard 1 request" ); assert.eq( 7 , s._connections[0].getDB( "test" ).foo.find().toArray().length + - s._connections[1].getDB( "test" ).foo.find().toArray().length , "blah 3" ); + s._connections[1].getDB( "test" ).foo.find().toArray().length , "combined shards" ); assert.eq( 7 , s.getDB( "test" ).foo.find().toArray().length , "normal B" ); assert.eq( 7 , s2.getDB( "test" ).foo.find().toArray().length , "other B" ); @@ -45,4 +45,54 @@ assert.eq( 7 , s2.getDB( "test" ).foo.find().toArray().length , "other B " + i ); } +assert.eq( 0 , s.config.big.find().itcount() , "C1" ); +for ( i=0; i<50; i++ ){ + s.config.big.insert( { _id : i } ); +} +s.config.getLastError(); +assert.eq( 50 , s.config.big.find().itcount() , "C2" ); +assert.eq( 50 , s.config.big.find().count() , "C3" ); +assert.eq( 50 , s.config.big.find().batchSize(5).itcount() , "C4" ); + + +hashes = [] + +for ( i=0; i<3; i++ ){ + print( i ); + s._connections[i].getDB( "config" ).chunks.find( {} , { lastmod : 1 } ).forEach( printjsononeline ); + hashes[i] = s._connections[i].getDB( "config" ).runCommand( "dbhash" ); +} + +printjson( hashes ); + +for ( i=1; i 10, "D" ); + +assert( t.findOne( { i : i - 1 } ), "E" ); +t.remove( { i : i - 1 } ); +assert( db.getLastError().indexOf( "capped" ) >= 0, "F" ); + +assert( t.validate().valid, "G" ); + +/* there is a cursor open here, so this is a convenient place for a quick cursor test. */ + +db._adminCommand("closeAllDatabases"); + +//assert( db.serverStatus().cursors.totalOpen == 0, "cursors open and shouldn't be"); diff -Nru mongodb-1.4.4/jstests/slowNightly/cursor8.js mongodb-1.6.3/jstests/slowNightly/cursor8.js --- mongodb-1.4.4/jstests/slowNightly/cursor8.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/cursor8.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,23 @@ +// This should get skipped when testing replication. + +t = db.cursor8; +t.drop(); +t.save( {} ); +t.save( {} ); +t.save( {} ); + +assert.eq( 3 , t.find().count() , "A0" ); + +db.getMongo().getDB( "admin" ).runCommand( {closeAllDatabases:1} ); + +function test( want , msg ){ + var res = db.runCommand( { cursorInfo:1 } ); + assert.eq( want , res.clientCursors_size , msg + " " + tojson( res ) ); +} + +test( 0 , "A1" ); +assert.eq( 3 , t.find().count() , "A2" ); +assert.eq( 3 , t.find( {} ).count() , "A3" ); +assert.eq( 2, t.find( {} ).limit( 2 ).itcount() , "A4" ); +test( 1 , "B1" ); + diff -Nru mongodb-1.4.4/jstests/slowNightly/recstore.js mongodb-1.6.3/jstests/slowNightly/recstore.js --- mongodb-1.4.4/jstests/slowNightly/recstore.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/recstore.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,24 @@ +// recstore.js +// this is a simple test for new recstores (see reci.h) +// it is probably redundant with other tests but is a convenient starting point +// for testing such things. + +t = db.storetest; + +t.drop(); + +t.save({z:3}); +t.save({z:2}); + +t.ensureIndex({z:1}); +t.ensureIndex({q:1}); +assert( t.find().sort({z:1})[0].z == 2 ); + +t.dropIndexes(); + +assert( t.find().sort({z:1})[0].z == 2 ); + +t.ensureIndex({z:1}); +t.ensureIndex({q:1}); + +db.getSisterDB('admin').$cmd.findOne({closeAllDatabases:1}); diff -Nru mongodb-1.4.4/jstests/slowNightly/remove9.js mongodb-1.6.3/jstests/slowNightly/remove9.js --- mongodb-1.4.4/jstests/slowNightly/remove9.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/remove9.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,12 @@ +t = db.jstests_remove9; +t.drop(); + +js = "while( 1 ) { for( i = 0; i < 10000; ++i ) { db.jstests_remove9.save( {i:i} ); } db.jstests_remove9.remove( {i: {$gte:0} } ); }"; +pid = startMongoProgramNoConnect( "mongo" , "--eval" , js , db ? db.getMongo().host : null ); + +for( var i = 0; i < 10000; ++i ) { + t.remove( {i:Random.randInt( 10000 )} ); + assert.automsg( "!db.getLastError()" ); +} + +stopMongoProgramByPid( pid ); \ No newline at end of file diff -Nru mongodb-1.4.4/jstests/slowNightly/run_sharding_passthrough.js mongodb-1.6.3/jstests/slowNightly/run_sharding_passthrough.js --- mongodb-1.4.4/jstests/slowNightly/run_sharding_passthrough.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/run_sharding_passthrough.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,94 @@ +s = new ShardingTest( "auto1" , 2 , 1 , 1 ); +s.adminCommand( { enablesharding : "test" } ); +db=s.getDB("test"); + +var files = listFiles("jstests"); + +var runnerStart = new Date() + +files.forEach( + function(x) { + +// /(basic|update).*\.js$/ + if ( /[\/\\]_/.test(x.name) || + ! /\.js$/.test(x.name ) ){ + print(" >>>>>>>>>>>>>>> skipping " + x.name); + return; + } + + // Notes: + + // apply_ops1: nothing works, dunno why yet. SERVER-1439 + + // copydb, copydb2: copyDatabase seems not to work at all in + // the ShardingTest setup. SERVER-1440 + + // cursor8: cursorInfo different/meaningless(?) in mongos + // closeAllDatabases may not work through mongos + // SERVER-1441 + // deal with cursorInfo in mongos SERVER-1442 + + // dbcase: Database names are case-insensitive under ShardingTest? + // SERVER-1443 + + // These are all SERVER-1444 + // count5: limit() and maybe skip() may be unreliable + // geo3: limit() not working, I think + // or4: skip() not working? + + // shellkillop: dunno yet. SERVER-1445 + + // These should simply not be run under sharding: + // dbadmin: Uncertain Cut-n-pasting its contents into mongo worked. + // error1: getpreverror not supported under sharding + // fsync, fsync2: isn't supported through mongos + // remove5: getpreverror, I think. don't run + // update4: getpreverror don't run + + // Around July 20, command passthrough went away, and these + // commands weren't implemented: + // clean cloneCollectionAsCapped copydbgetnonce dataSize + // datasize dbstats deleteIndexes dropIndexes forceerror + // getnonce logout medianKey profile reIndex repairDatabase + // reseterror splitVector validate + + /* missing commands : + * forceerror and switchtoclienterrors + * cloneCollectionAsCapped + * splitvector + * profile (apitest_db, cursor6, evalb) + * copydbgetnonce + * dbhash + * medianKey + * clean (apitest_dbcollection) + * logout and getnonce + */ + if (/[\/\\](error3|capped.*|splitvector|apitest_db|cursor6|copydb-auth|profile1|dbhash|median|apitest_dbcollection|evalb|auth1|auth2)\.js$/.test(x.name)) { + print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name) + return; + } + // These are bugs (some might be fixed now): + if (/[\/\\](apply_ops1|count5|cursor8|or4|shellkillop|update4)\.js$/.test(x.name)) { + print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name) + return; + } + // These aren't supposed to get run under sharding: + if (/[\/\\](dbadmin|error1|fsync|fsync2|geo.*|indexh|remove5|update4)\.js$/.test(x.name)) { + print(" >>>>>>>>>>>>>>> skipping test that would fail under sharding " + x.name) + return; + } + + print(" *******************************************"); + print(" Test : " + x.name + " ..."); + print(" " + Date.timeFunc( + function() { + load(x.name); + }, 1) + "ms"); + + } +); + + +var runnerEnd = new Date() + +print( "total runner time: " + ( ( runnerEnd.getTime() - runnerStart.getTime() ) / 1000 ) + "secs" ) diff -Nru mongodb-1.4.4/jstests/slowNightly/sharding_balance1.js mongodb-1.6.3/jstests/slowNightly/sharding_balance1.js --- mongodb-1.4.4/jstests/slowNightly/sharding_balance1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/sharding_balance1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,55 @@ +// sharding_balance1.js + + +s = new ShardingTest( "slow_sharding_balance1" , 2 , 2 , 1 , { chunksize : 1 } ) + +s.adminCommand( { enablesharding : "test" } ); + +s.config.settings.find().forEach( printjson ) + +db = s.getDB( "test" ); + +bigString = "" +while ( bigString.length < 10000 ) + bigString += "asdasdasdasdadasdasdasdasdasdasdasdasda"; + +inserted = 0; +num = 0; +while ( inserted < ( 20 * 1024 * 1024 ) ){ + db.foo.insert( { _id : num++ , s : bigString } ); + inserted += bigString.length; +} + +db.getLastError(); +s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } ); +assert.lt( 20 , s.config.chunks.count() , "setup2" ); + +function diff(){ + var x = s.chunkCounts( "foo" ); + printjson( x ) + return Math.max( x.shard0000 , x.shard0001 ) - Math.min( x.shard0000 , x.shard0001 ); +} + +function sum(){ + var x = s.chunkCounts( "foo" ); + return x.shard0000 + x.shard0001; +} + +assert.lt( 20 , diff() , "big differential here" ); +print( diff() ) + +assert.soon( function(){ + var d = diff(); + return d < 5; +} , "balance didn't happen" , 1000 * 60 * 3 , 5000 ); + +var chunkCount = sum(); +s.adminCommand( { removeshard: "shard0000" } ); + +assert.soon( function(){ + printjson(s.chunkCounts( "foo" )); + s.config.shards.find().forEach(function(z){printjson(z);}); + return chunkCount == s.config.chunks.count({shard: "shard0001"}); +} , "removeshard didn't happen" , 1000 * 60 * 3 , 5000 ); + +s.stop(); diff -Nru mongodb-1.4.4/jstests/slowNightly/sharding_balance2.js mongodb-1.6.3/jstests/slowNightly/sharding_balance2.js --- mongodb-1.4.4/jstests/slowNightly/sharding_balance2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/sharding_balance2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,54 @@ +// sharding_balance2.js + +s = new ShardingTest( "slow_sharding_balance2" , 2 , 2 , 1 , { chunksize : 1 , manualAddShard : true } ) + +names = s.getConnNames(); +for ( var i=0; i .99 ){ + db.getLastError() + check(); // SERVER-1430 TODO + } + + var x = dist(); + if ( Math.random() > .999 ) + printjson( x ) + return Math.max( x.shard0000 , x.shard0001 ) - Math.min( x.shard0000 , x.shard0001 ); +} + +function sum(){ + var x = dist(); + return x.shard0000 + x.shard0001; +} + +assert.lt( 20 , diff() ,"initial load" ); +print( diff() ) + +assert.soon( function(){ + + var d = diff(); + return d < 5; +} , "balance didn't happen" , 1000 * 60 * 3 , 1 ); + + +s.stop(); diff -Nru mongodb-1.4.4/jstests/slowNightly/sharding_cursors1.js mongodb-1.6.3/jstests/slowNightly/sharding_cursors1.js --- mongodb-1.4.4/jstests/slowNightly/sharding_cursors1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/sharding_cursors1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,71 @@ +s = new ShardingTest( "cursors1" , 2 , 0 , 1 , { chunksize : 1 } ) + +s.adminCommand( { enablesharding : "test" } ); + +s.config.settings.find().forEach( printjson ) + +db = s.getDB( "test" ); + +bigString = "x" +while (bigString.length < 1024) + bigString += bigString; +assert.eq(bigString.length, 1024, 'len'); + +s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } ); + +toInsert = ( 1 * 1000 * 1000 ); +for (var i=0; i < toInsert; i++ ){ + db.foo.insert( { i: i, r: Math.random(), s: bigString } ); + assert.eq(db.getLastError(), null, 'no error'); //SERVER-1541 +} + +inserted = toInsert; +for (var i=0; i < 10; i++ ){ + //assert.gte(db.foo.count(), toInsert, 'inserted enough'); //sometimes fails + assert.gte(db.foo.count(), toInsert - 100, 'inserted enough'); + inserted = Math.min(inserted, db.foo.count()) + sleep (100); +} + +print("\n\n\n **** inserted: " + inserted + '\n\n\n'); + +/* + +var line = 0; +try { + assert.gte(db.foo.find({}, {_id:1}).itcount(), inserted, 'itcount check - no sort - _id only'); + line = 1; + assert.gte(db.foo.find({}, {_id:1}).sort({_id:1}).itcount(), inserted, 'itcount check - _id sort - _id only'); + line = 2; + + db.foo.ensureIndex({i:1}); + db.foo.ensureIndex({r:1}); + db.getLastError(); + line = 3; + + assert.gte(db.foo.find({}, {i:1}).sort({i:1}).itcount(), inserted, 'itcount check - i sort - i only'); + line = 4; + assert.gte(db.foo.find({}, {_id:1}).sort({i:1}).itcount(), inserted, 'itcount check - i sort - _id only'); + line = 5; + + assert.gte(db.foo.find({}, {r:1}).sort({r:1}).itcount(), inserted, 'itcount check - r sort - r only'); + line = 6; + assert.gte(db.foo.find({}, {_id:1}).sort({r:1}).itcount(), inserted, 'itcount check - r sort - _id only'); + line = 7; + + assert.gte(db.foo.find().itcount(), inserted, 'itcount check - no sort - full'); + line = 8; + assert.gte(db.foo.find().sort({_id:1}).itcount(), inserted, 'itcount check - _id sort - full'); + line = 9; + assert.gte(db.foo.find().sort({i:1}).itcount(), inserted, 'itcount check - i sort - full'); + line = 10; + assert.gte(db.foo.find().sort({r:1}).itcount(), inserted, 'itcount check - r sort - full'); + line = 11; +} catch (e) { + print("***** finished through line " + line + " before exception"); + throw e; +} + +*/ + +s.stop(); diff -Nru mongodb-1.4.4/jstests/slowNightly/sharding_rs1.js mongodb-1.6.3/jstests/slowNightly/sharding_rs1.js --- mongodb-1.4.4/jstests/slowNightly/sharding_rs1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowNightly/sharding_rs1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,72 @@ +// tests sharding with replica sets + +s = new ShardingTest( "rs1" , 3 , 1 , 2 , { rs : true , chunksize : 1 } ) + +s.adminCommand( { enablesharding : "test" } ); + +s.config.settings.find().forEach( printjson ) + +db = s.getDB( "test" ); + +bigString = "" +while ( bigString.length < 10000 ) + bigString += "asdasdasdasdadasdasdasdasdasdasdasdasda"; + +inserted = 0; +num = 0; +while ( inserted < ( 20 * 1024 * 1024 ) ){ + db.foo.insert( { _id : num++ , s : bigString , x : Math.random() } ); + inserted += bigString.length; +} + +db.getLastError(); +s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } ); +assert.lt( 20 , s.config.chunks.count() , "setup2" ); + +function diff(){ + var x = s.chunkCounts( "foo" ); + var total = 0; + var min = 1000000000; + var max = 0; + for ( var sn in x ){ + total += x[sn]; + if ( x[sn] < min ) + min = x[sn]; + if ( x[sn] > max ) + max = x[sn]; + } + + print( tojson(x) + " total: " + total + " min: " + min + " max: " + max ) + return max - min; +} + +assert.lt( 20 , diff() , "big differential here" ); +print( diff() ) + +assert.soon( function(){ + var d = diff(); + return d < 5; +} , "balance didn't happen" , 1000 * 60 * 3 , 5000 ); + +s.config.settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true ); + +for ( i=0; i 2000 ) + break; + + N *= 2; +} + +// --- test 1 + +assert.eq( 0, db.currentOp().inprog.length , "setup broken" ); + +join = startParallelShell( "print( 0 == db.query_yield1.find( function(){ var x=this.n; for ( var i=0; i<500; i++ ){ x = x * 2; } return false; } ).itcount() ); " ) + +assert.soon( + function(){ + var x = db.currentOp().inprog; + return x.length > 0; + } , "never doing query" , 2000 , 1 +); + +print( "start query" ); + +num = 0; +start = new Date(); +while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){ + var me = Date.timeFunc( function(){ t.insert( { x : 1 } ); db.getLastError(); } ) + var x = db.currentOp() + + if ( num++ == 0 ){ + assert.eq( 1 , x.inprog.length , "nothing in prog" ); + } + + assert.gt( 50 , me ); + + if ( x.inprog.length == 0 ) + break; + +} + +join(); + +var x = db.currentOp() +assert.eq( 0 , x.inprog.length , "weird 2" ); + diff -Nru mongodb-1.4.4/jstests/slowWeekly/query_yield2.js mongodb-1.6.3/jstests/slowWeekly/query_yield2.js --- mongodb-1.4.4/jstests/slowWeekly/query_yield2.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowWeekly/query_yield2.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,73 @@ + +t = db.query_yield2; +t.drop() + +N = 100; +i = 0; + +q = function(){ var x=this.n; for ( var i=0; i<50000; i++ ){ x = x * 2; } return false; } + +while ( true ){ + function fill(){ + for ( ; i 2000 ) + break; + + N *= 2; +} + +// --- test 1 + +assert.eq( 0, db.currentOp().inprog.length , "setup broken" ); + +join = startParallelShell( "print( 0 == db.query_yield2.find( function(){ var x=this.n; for ( var i=0; i<50000; i++ ){ x = x * 2; } return false; } ).itcount() ); " ) + +assert.soon( + function(){ + var x = db.currentOp().inprog; + return x.length > 0; + } , "never doing query" , 2000 , 1 +); + +print( "start query" ); + +num = 0; +start = new Date(); +while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){ + var me = Date.timeFunc( function(){ t.insert( { x : 1 } ); db.getLastError(); } ) + var x = db.currentOp() + + if ( num++ == 0 ){ + assert.eq( 1 , x.inprog.length , "nothing in prog" ); + } + + assert.gt( 75 , me ); + + if ( x.inprog.length == 0 ) + break; + +} + +join(); + +var x = db.currentOp() +assert.eq( 0 , x.inprog.length , "weird 2" ); + diff -Nru mongodb-1.4.4/jstests/slowWeekly/update_yield1.js mongodb-1.6.3/jstests/slowWeekly/update_yield1.js --- mongodb-1.4.4/jstests/slowWeekly/update_yield1.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/slowWeekly/update_yield1.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,78 @@ + +t = db.update_yield1; +t.drop() + +N = 10000; +i = 0; + +while ( true ){ + function fill(){ + for ( ; i 2000 ) + break; + + N *= 2; +} + +// --- test 1 + +join = startParallelShell( "db.update_yield1.update( {} , { $inc : { n : 1 } } , false , true ); db.getLastError()" ); + +assert.soon( + function(){ + return db.currentOp().inprog.length > 0; + } , "never doing update" +); + +num = 0; +start = new Date(); +while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){ + var me = Date.timeFunc( function(){ t.findOne(); } ); + + if ( num++ == 0 ){ + var x = db.currentOp() + assert.eq( 1 , x.inprog.length , "nothing in prog" ); + } + + assert.gt( 50 , me ); +} + +join(); + +var x = db.currentOp() +assert.eq( 0 , x.inprog.length , "weird 2" ); + +// --- test 2 + +join = startParallelShell( "db.update_yield1.update( { $atomic : true } , { $inc : { n : 1 } } , false , true ); db.getLastError()" ); + +assert.soon( + function(){ + return db.currentOp().inprog.length > 0; + } , "never doing update 2" +); + +t.findOne(); +var x = db.currentOp() +assert.eq( 0 , x.inprog.length , "should have been atomic" ); + +join(); diff -Nru mongodb-1.4.4/jstests/splitvector.js mongodb-1.6.3/jstests/splitvector.js --- mongodb-1.4.4/jstests/splitvector.js 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/jstests/splitvector.js 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,81 @@ +// ------------------------- +// SPLITVECTOR TEST UTILS +// ------------------------- + +// ------------------------- +// assertChunkSizes verifies that a given 'splitVec' divides the 'test.jstest_splitvector' +// collection in 'maxChunkSize' approximately-sized chunks. Its asserts fail otherwise. +// @param splitVec: an array with keys for field 'x' +// e.g. [ { x : 1927 }, { x : 3855 }, ... +// @param numDocs: domain of 'x' field +// e.g. 20000 +// @param maxChunkSize is in MBs. +// +assertChunkSizes = function ( splitVec , numDocs , maxChunkSize ){ + splitVec = [{ x: -1 }].concat( splitVec ); + splitVec.push( { x: numDocs+1 } ); + for ( i=0; i - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/lib/Debug/README mongodb-1.6.3/msvc/lib/Debug/README --- mongodb-1.4.4/msvc/lib/Debug/README 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/lib/Debug/README 1969-12-31 16:00:00.000000000 -0800 @@ -1 +0,0 @@ -This is a dummy file to prevent Git from ignoring this empty directory. \ No newline at end of file diff -Nru mongodb-1.4.4/msvc/lib/Release/README mongodb-1.6.3/msvc/lib/Release/README --- mongodb-1.4.4/msvc/lib/Release/README 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/lib/Release/README 1969-12-31 16:00:00.000000000 -0800 @@ -1 +0,0 @@ -This is a dummy file to prevent Git from ignoring this empty directory. \ No newline at end of file diff -Nru mongodb-1.4.4/msvc/mongo/mongo.vcproj mongodb-1.6.3/msvc/mongo/mongo.vcproj --- mongodb-1.4.4/msvc/mongo/mongo.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongo/mongo.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,312 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongo_app.vsprops mongodb-1.6.3/msvc/mongo_app.vsprops --- mongodb-1.4.4/msvc/mongo_app.vsprops 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongo_app.vsprops 1969-12-31 16:00:00.000000000 -0800 @@ -1,22 +0,0 @@ - - - - - - diff -Nru mongodb-1.4.4/msvc/mongobridge/mongobridge.vcproj mongodb-1.6.3/msvc/mongobridge/mongobridge.vcproj --- mongodb-1.4.4/msvc/mongobridge/mongobridge.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongobridge/mongobridge.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,296 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongoclient/mongoclient.vcproj mongodb-1.6.3/msvc/mongoclient/mongoclient.vcproj --- mongodb-1.4.4/msvc/mongoclient/mongoclient.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongoclient/mongoclient.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,240 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongo_common/mongo_common.vcproj mongodb-1.6.3/msvc/mongo_common/mongo_common.vcproj --- mongodb-1.4.4/msvc/mongo_common/mongo_common.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongo_common/mongo_common.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,940 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongod/mongod.vcproj mongodb-1.6.3/msvc/mongod/mongod.vcproj --- mongodb-1.4.4/msvc/mongod/mongod.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongod/mongod.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,232 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongodump/mongodump.vcproj mongodb-1.6.3/msvc/mongodump/mongodump.vcproj --- mongodb-1.4.4/msvc/mongodump/mongodump.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongodump/mongodump.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,296 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongoexport/mongoexport.vcproj mongodb-1.6.3/msvc/mongoexport/mongoexport.vcproj --- mongodb-1.4.4/msvc/mongoexport/mongoexport.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongoexport/mongoexport.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,296 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongofiles/mongofiles.vcproj mongodb-1.6.3/msvc/mongofiles/mongofiles.vcproj --- mongodb-1.4.4/msvc/mongofiles/mongofiles.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongofiles/mongofiles.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,296 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongoimportjson/mongoimportjson.vcproj mongodb-1.6.3/msvc/mongoimportjson/mongoimportjson.vcproj --- mongodb-1.4.4/msvc/mongoimportjson/mongoimportjson.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongoimportjson/mongoimportjson.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,296 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongo_lib.vsprops mongodb-1.6.3/msvc/mongo_lib.vsprops --- mongodb-1.4.4/msvc/mongo_lib.vsprops 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongo_lib.vsprops 1969-12-31 16:00:00.000000000 -0800 @@ -1,22 +0,0 @@ - - - - - - diff -Nru mongodb-1.4.4/msvc/mongorestore/mongorestore.vcproj mongodb-1.6.3/msvc/mongorestore/mongorestore.vcproj --- mongodb-1.4.4/msvc/mongorestore/mongorestore.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongorestore/mongorestore.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,296 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongos/mongos.vcproj mongodb-1.6.3/msvc/mongos/mongos.vcproj --- mongodb-1.4.4/msvc/mongos/mongos.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongos/mongos.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,228 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/mongo.sln mongodb-1.6.3/msvc/mongo.sln --- mongodb-1.4.4/msvc/mongo.sln 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/mongo.sln 1969-12-31 16:00:00.000000000 -0800 @@ -1,138 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 10.00 -# Visual Studio 2008 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongo_common", "mongo_common\mongo_common.vcproj", "{69E92318-D8DA-434E-B3D6-F172E88ADC95}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongod", "mongod\mongod.vcproj", "{0811084D-41C1-49E6-998B-3B1230227FF0}" - ProjectSection(ProjectDependencies) = postProject - {3FEF2C0D-6B49-469F-94C4-0673291D58CE} = {3FEF2C0D-6B49-469F-94C4-0673291D58CE} - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - {8DA99072-BDC8-4204-8DE2-67B5A5466D40} = {8DA99072-BDC8-4204-8DE2-67B5A5466D40} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongo", "mongo\mongo.vcproj", "{34348125-2F31-459F-AA95-A1525903AE2B}" - ProjectSection(ProjectDependencies) = postProject - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "core_server", "core_server\core_server.vcproj", "{8DA99072-BDC8-4204-8DE2-67B5A5466D40}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "server_only", "server_only\server_only.vcproj", "{3FEF2C0D-6B49-469F-94C4-0673291D58CE}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "shard_server", "shard_server\shard_server.vcproj", "{9D2CD1F3-973E-49E6-A0E2-95C834562263}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongos", "mongos\mongos.vcproj", "{942113AE-678B-4C7B-BC78-D91AB9C52390}" - ProjectSection(ProjectDependencies) = postProject - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - {8DA99072-BDC8-4204-8DE2-67B5A5466D40} = {8DA99072-BDC8-4204-8DE2-67B5A5466D40} - {9D2CD1F3-973E-49E6-A0E2-95C834562263} = {9D2CD1F3-973E-49E6-A0E2-95C834562263} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongodump", "mongodump\mongodump.vcproj", "{44348125-2F31-459F-AA95-A1525903AE2B}" - ProjectSection(ProjectDependencies) = postProject - {3FEF2C0D-6B49-469F-94C4-0673291D58CE} = {3FEF2C0D-6B49-469F-94C4-0673291D58CE} - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongorestore", "mongorestore\mongorestore.vcproj", "{45348125-2F31-459F-AA95-A1525903AE2B}" - ProjectSection(ProjectDependencies) = postProject - {3FEF2C0D-6B49-469F-94C4-0673291D58CE} = {3FEF2C0D-6B49-469F-94C4-0673291D58CE} - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongoexport", "mongoexport\mongoexport.vcproj", "{45448125-2F31-459F-AA95-A1525903AE2B}" - ProjectSection(ProjectDependencies) = postProject - {3FEF2C0D-6B49-469F-94C4-0673291D58CE} = {3FEF2C0D-6B49-469F-94C4-0673291D58CE} - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongoimportjson", "mongoimportjson\mongoimportjson.vcproj", "{45459135-2F31-459F-AA95-A1525903AE2B}" - ProjectSection(ProjectDependencies) = postProject - {3FEF2C0D-6B49-469F-94C4-0673291D58CE} = {3FEF2C0D-6B49-469F-94C4-0673291D58CE} - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongofiles", "mongofiles\mongofiles.vcproj", "{45459125-2F31-459F-AA95-A1525903AE2B}" - ProjectSection(ProjectDependencies) = postProject - {3FEF2C0D-6B49-469F-94C4-0673291D58CE} = {3FEF2C0D-6B49-469F-94C4-0673291D58CE} - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongobridge", "mongobridge\mongobridge.vcproj", "{45458225-2F31-459F-AA95-A1525903AE2B}" - ProjectSection(ProjectDependencies) = postProject - {3FEF2C0D-6B49-469F-94C4-0673291D58CE} = {3FEF2C0D-6B49-469F-94C4-0673291D58CE} - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongoclient", "mongoclient\mongoclient.vcproj", "{36AAAE5C-4750-4713-9240-A43BE30BA8D3}" - ProjectSection(ProjectDependencies) = postProject - {69E92318-D8DA-434E-B3D6-F172E88ADC95} = {69E92318-D8DA-434E-B3D6-F172E88ADC95} - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {69E92318-D8DA-434E-B3D6-F172E88ADC95}.Debug|Win32.ActiveCfg = Debug|Win32 - {69E92318-D8DA-434E-B3D6-F172E88ADC95}.Debug|Win32.Build.0 = Debug|Win32 - {69E92318-D8DA-434E-B3D6-F172E88ADC95}.Release|Win32.ActiveCfg = Release|Win32 - {69E92318-D8DA-434E-B3D6-F172E88ADC95}.Release|Win32.Build.0 = Release|Win32 - {0811084D-41C1-49E6-998B-3B1230227FF0}.Debug|Win32.ActiveCfg = Debug|Win32 - {0811084D-41C1-49E6-998B-3B1230227FF0}.Debug|Win32.Build.0 = Debug|Win32 - {0811084D-41C1-49E6-998B-3B1230227FF0}.Release|Win32.ActiveCfg = Release|Win32 - {0811084D-41C1-49E6-998B-3B1230227FF0}.Release|Win32.Build.0 = Release|Win32 - {34348125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.ActiveCfg = Debug|Win32 - {34348125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.Build.0 = Debug|Win32 - {34348125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.ActiveCfg = Release|Win32 - {34348125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.Build.0 = Release|Win32 - {8DA99072-BDC8-4204-8DE2-67B5A5466D40}.Debug|Win32.ActiveCfg = Debug|Win32 - {8DA99072-BDC8-4204-8DE2-67B5A5466D40}.Debug|Win32.Build.0 = Debug|Win32 - {8DA99072-BDC8-4204-8DE2-67B5A5466D40}.Release|Win32.ActiveCfg = Release|Win32 - {8DA99072-BDC8-4204-8DE2-67B5A5466D40}.Release|Win32.Build.0 = Release|Win32 - {3FEF2C0D-6B49-469F-94C4-0673291D58CE}.Debug|Win32.ActiveCfg = Debug|Win32 - {3FEF2C0D-6B49-469F-94C4-0673291D58CE}.Debug|Win32.Build.0 = Debug|Win32 - {3FEF2C0D-6B49-469F-94C4-0673291D58CE}.Release|Win32.ActiveCfg = Release|Win32 - {3FEF2C0D-6B49-469F-94C4-0673291D58CE}.Release|Win32.Build.0 = Release|Win32 - {9D2CD1F3-973E-49E6-A0E2-95C834562263}.Debug|Win32.ActiveCfg = Debug|Win32 - {9D2CD1F3-973E-49E6-A0E2-95C834562263}.Debug|Win32.Build.0 = Debug|Win32 - {9D2CD1F3-973E-49E6-A0E2-95C834562263}.Release|Win32.ActiveCfg = Release|Win32 - {9D2CD1F3-973E-49E6-A0E2-95C834562263}.Release|Win32.Build.0 = Release|Win32 - {942113AE-678B-4C7B-BC78-D91AB9C52390}.Debug|Win32.ActiveCfg = Debug|Win32 - {942113AE-678B-4C7B-BC78-D91AB9C52390}.Debug|Win32.Build.0 = Debug|Win32 - {942113AE-678B-4C7B-BC78-D91AB9C52390}.Release|Win32.ActiveCfg = Release|Win32 - {942113AE-678B-4C7B-BC78-D91AB9C52390}.Release|Win32.Build.0 = Release|Win32 - {44348125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.ActiveCfg = Debug|Win32 - {44348125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.Build.0 = Debug|Win32 - {44348125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.ActiveCfg = Release|Win32 - {44348125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.Build.0 = Release|Win32 - {45348125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.ActiveCfg = Debug|Win32 - {45348125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.Build.0 = Debug|Win32 - {45348125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.ActiveCfg = Release|Win32 - {45348125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.Build.0 = Release|Win32 - {45448125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.ActiveCfg = Debug|Win32 - {45448125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.Build.0 = Debug|Win32 - {45448125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.ActiveCfg = Release|Win32 - {45448125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.Build.0 = Release|Win32 - {45459135-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.ActiveCfg = Debug|Win32 - {45459135-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.Build.0 = Debug|Win32 - {45459135-2F31-459F-AA95-A1525903AE2B}.Release|Win32.ActiveCfg = Release|Win32 - {45459135-2F31-459F-AA95-A1525903AE2B}.Release|Win32.Build.0 = Release|Win32 - {45459125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.ActiveCfg = Debug|Win32 - {45459125-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.Build.0 = Debug|Win32 - {45459125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.ActiveCfg = Release|Win32 - {45459125-2F31-459F-AA95-A1525903AE2B}.Release|Win32.Build.0 = Release|Win32 - {45458225-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.ActiveCfg = Debug|Win32 - {45458225-2F31-459F-AA95-A1525903AE2B}.Debug|Win32.Build.0 = Debug|Win32 - {45458225-2F31-459F-AA95-A1525903AE2B}.Release|Win32.ActiveCfg = Release|Win32 - {45458225-2F31-459F-AA95-A1525903AE2B}.Release|Win32.Build.0 = Release|Win32 - {36AAAE5C-4750-4713-9240-A43BE30BA8D3}.Debug|Win32.ActiveCfg = Debug|Win32 - {36AAAE5C-4750-4713-9240-A43BE30BA8D3}.Debug|Win32.Build.0 = Debug|Win32 - {36AAAE5C-4750-4713-9240-A43BE30BA8D3}.Release|Win32.ActiveCfg = Release|Win32 - {36AAAE5C-4750-4713-9240-A43BE30BA8D3}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff -Nru mongodb-1.4.4/msvc/msvc_scripting.cpp mongodb-1.6.3/msvc/msvc_scripting.cpp --- mongodb-1.4.4/msvc/msvc_scripting.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/msvc_scripting.cpp 1969-12-31 16:00:00.000000000 -0800 @@ -1,26 +0,0 @@ -/* - * Copyright 2010 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "stdafx.h" - -#if defined(USESM) -#include "..\scripting\engine_spidermonkey.cpp" -#elif defined(NOJNI) -#include "..\scripting\engine_java.cpp" -#else -#include "..\scripting\engine_none.cpp" -#endif \ No newline at end of file diff -Nru mongodb-1.4.4/msvc/README mongodb-1.6.3/msvc/README --- mongodb-1.4.4/msvc/README 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/README 1969-12-31 16:00:00.000000000 -0800 @@ -1,54 +0,0 @@ - -Instructions for compiling MongoDB in Visual Studio 2008 -======================================================== - -Visual Studio Solution: ------------------------ - -mongo.sln -> MongoDB solution that contains all projects necessary for building applications and libraries. - - - -Static Library Projects: ------------------------- - -mongo_common -> common MongoDB files -core_server -> score server files -server_only -> files for building server-only applications -shard_server -> shard server files - - -Console Application Projects: ------------------------------ - -mongod -> MongoDB server (links mongo_common and server_only) -mongo -> MongoDB shell (links mongo_common) -mongobridge -> MongoDB bridge server shell (links mongo_common and server_only) -mongodump -> MongoDB dump application (links mongo_common and server_only) -mongoexport -> MongoDB export application (links mongo_common and server_only) -mongofiles -> MongoDB files application (links mongo_common and server_only) -mongoimportjson -> MongoDB import json application (links mongo_common and server_only) -mongorestore -> MongoDB restore application (links mongo_common and server_only) -mongos -> MongoDB shard server (links mongo_common, core_server and shard_server) - - -Client Driver Library: ------------------------------ - -mongoclient -> static library containing client driver files - - - -Notes: -====== - -1) All static libraries derive project settings from Project Property Sheet "mongo_lib" -(View->Other Windows->Property Manager). Settings configured in this Property Sheet will -be inherited by all static library projects (Include Directories, Library Directories, etc). - -2) All console applications derive project settings from "mongo_app". - -3) msvc_scripting.cpp is used to control the javascript library to use - to change, simply -modify the "Preprocessor" project setting in the Property Sheets to reflect the required -javascript option (USESM or NOJNI). - diff -Nru mongodb-1.4.4/msvc/server_only/server_only.vcproj mongodb-1.6.3/msvc/server_only/server_only.vcproj --- mongodb-1.4.4/msvc/server_only/server_only.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/server_only/server_only.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,362 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/msvc/shard_server/shard_server.vcproj mongodb-1.6.3/msvc/shard_server/shard_server.vcproj --- mongodb-1.4.4/msvc/shard_server/shard_server.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/msvc/shard_server/shard_server.vcproj 1969-12-31 16:00:00.000000000 -0800 @@ -1,262 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru mongodb-1.4.4/pch.cpp mongodb-1.6.3/pch.cpp --- mongodb-1.4.4/pch.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/pch.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,30 @@ +// pch.cpp : helper for using precompiled headers + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pch.h" + +#if defined( __MSVC__ ) +// should probably check VS version here +#elif defined( __GNUC__ ) + +#if __GNUC__ < 4 +#error gcc < 4 not supported +#endif + +#else +// unknown compiler +#endif diff -Nru mongodb-1.4.4/pch.h mongodb-1.6.3/pch.h --- mongodb-1.4.4/pch.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/pch.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,170 @@ +// pch.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MONGO_PCH_H +#define MONGO_PCH_H + +#if defined(MONGO_EXPOSE_MACROS) +# define JS_C_STRINGS_ARE_UTF8 +# undef SUPPORT_UCP +# define SUPPORT_UCP +# undef SUPPORT_UTF8 +# define SUPPORT_UTF8 +# undef _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_WARNINGS +#endif + +#if defined(WIN32) + +#ifndef _WIN32 +#define _WIN32 +#endif + +#endif + +#if defined(_WIN32) +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include //this must be included before the first windows.h include +# include +# include +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "targetver.h" +#include "time.h" +#include "string.h" +#include "limits.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "boost/bind.hpp" +#include "boost/function.hpp" +#include +#include "boost/detail/endian.hpp" +#define BOOST_SPIRIT_THREADSAFE +#include +#include +#include +#include +#include +#include +#undef assert +#define assert MONGO_assert + +namespace mongo { + + using namespace std; + using boost::shared_ptr; + +#if defined(_DEBUG) + const bool debug=true; +#else + const bool debug=false; +#endif + + // pdfile versions + const int VERSION = 4; + const int VERSION_MINOR = 5; + + enum ExitCode { + EXIT_CLEAN = 0 , + EXIT_BADOPTIONS = 2 , + EXIT_REPLICATION_ERROR = 3 , + EXIT_NEED_UPGRADE = 4 , + EXIT_KILL = 12 , + EXIT_ABRUBT = 14 , + EXIT_NTSERVICE_ERROR = 20 , + EXIT_JAVA = 21 , + EXIT_OOM_MALLOC = 42 , + EXIT_OOM_REALLOC = 43 , + EXIT_FS = 45 , + EXIT_CLOCK_SKEW = 47 , + EXIT_NET_ERROR = 48 , + EXIT_POSSIBLE_CORRUPTION = 60 , // this means we detected a possible corruption situation, like a buf overflow + EXIT_UNCAUGHT = 100 , // top level exception that wasn't caught + EXIT_TEST = 101 , + + }; + + void dbexit( ExitCode returnCode, const char *whyMsg = ""); + + /** + this is here so you can't just type exit() to quit the program + you should either use dbexit to shutdown cleanly, or ::exit to tell the system to quit + if you use this, you'll get a link error since mongo::exit isn't defined + */ + void exit( ExitCode returnCode ); + bool inShutdown(); + +} // namespace mongo + +namespace mongo { + using namespace boost::filesystem; + void asserted(const char *msg, const char *file, unsigned line); +} + +#define MONGO_assert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) + +#include "util/debug_util.h" +#include "util/goodies.h" +#include "util/log.h" +#include "util/allocator.h" +#include "util/assert_util.h" + +namespace mongo { + + void sayDbContext(const char *msg = 0); + void rawOut( const string &s ); + +} // namespace mongo + +namespace mongo { + + typedef char _TCHAR; + + using boost::uint32_t; + using boost::uint64_t; + +} // namespace mongo + +#endif // MONGO_PCH_H diff -Nru mongodb-1.4.4/pcre-7.4/config.h mongodb-1.6.3/pcre-7.4/config.h --- mongodb-1.4.4/pcre-7.4/config.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/pcre-7.4/config.h 2010-09-24 10:02:42.000000000 -0700 @@ -212,7 +212,9 @@ /* #undef SUPPORT_UCP */ /* Define to enable support for the UTF-8 Unicode encoding. */ +#if( !defined(SUPPORT_UTF8) ) #define SUPPORT_UTF8 +#endif /* Version number of package */ #define VERSION "7.4" diff -Nru mongodb-1.4.4/README mongodb-1.6.3/README --- mongodb-1.4.4/README 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/README 2010-09-24 10:02:42.000000000 -0700 @@ -1,43 +1,51 @@ -MongoDB README - -DOCUMENTATION - - http://www.mongodb.org/ - -COMPONENTS - - mongod - The database process. - mongos - Sharding controller. - mongo - The database shell (uses interactive javascript). - -BUILDING - - see docs/building.md - - -RUNNING - - ./mongod - - runs the database. Use - - ./mongod --help - - to see command line options. - -NOTES - - Mongo uses memory mapped files. If built as a 32 bit executable, you will - not be able to work with large (multi-gigabyte) databases. However, 32 bit - builds work fine with small development databases. - - -LICENSING - - Most MongoDB source files are made available under the terms of the - GNU Affero General Public License (AGPL). See individual files for - details. - - As an exception, the files in the debian/ directory, the rpm/ - directory, and all subdirectories thereof are made available under - the terms of the Apache License, version 2.0. +MongoDB README + +DOCUMENTATION + + http://www.mongodb.org/ + +COMPONENTS + + mongod - The database process. + mongos - Sharding controller. + mongo - The database shell (uses interactive javascript). + +BUILDING + + See docs/building.md, also www.mongodb.org search for "Building". + +RUNNING + + For command line options invoke: + + $ ./mongod --help + + To run a single server database: + + $ mkdir /data/db + $ ./mongod + $ + $ # The mongo javascript shell connects to localhost and test database by default: + $ ./mongo + > help + +DRIVERS + + Client drivers for most programming languages are available at mongodb.org. + +NOTES + + Mongo uses memory mapped files. If built as a 32 bit executable, you will + not be able to work with large (multi-gigabyte) databases. However, 32 bit + builds work fine with small development databases. + +LICENSE + + Most MongoDB source files are made available under the terms of the + GNU Affero General Public License (AGPL). See individual files for + details. + + As an exception, the files in the client/, debian/, rpm/, + utils/mongoutils, and all subdirectories thereof are made available under + the terms of the Apache License, version 2.0. + diff -Nru mongodb-1.4.4/rpm/init.d-mongod mongodb-1.6.3/rpm/init.d-mongod --- mongodb-1.4.4/rpm/init.d-mongod 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/rpm/init.d-mongod 2010-09-24 10:02:42.000000000 -0700 @@ -29,7 +29,8 @@ echo -n $"Starting mongod: " daemon --user "$MONGO_USER" $mongod $OPTIONS RETVAL=$? - [ $RETVAL -eq 0 ] && touch /var/lock/subsys/mongod && success + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/mongod } stop() @@ -37,7 +38,8 @@ echo -n $"Stopping mongod: " killproc -p /var/lib/mongo/mongod.lock -t30 -TERM /usr/bin/mongod RETVAL=$? - [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/mongod && success + echo + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/mongod } restart () { diff -Nru mongodb-1.4.4/rpm/mongod.conf mongodb-1.6.3/rpm/mongod.conf --- mongodb-1.4.4/rpm/mongod.conf 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/rpm/mongod.conf 2010-09-24 10:02:42.000000000 -0700 @@ -3,6 +3,8 @@ #where to log logpath=/var/log/mongo/mongod.log +logappend=true + # fork and run in background fork = true diff -Nru mongodb-1.4.4/rpm/mongo.spec mongodb-1.6.3/rpm/mongo.spec --- mongodb-1.4.4/rpm/mongo.spec 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/rpm/mongo.spec 2010-09-24 10:02:42.000000000 -0700 @@ -1,5 +1,5 @@ Name: mongo -Version: 1.4.4 +Version: 1.6.3 Release: mongodb_1%{?dist} Summary: mongo client shell and tools License: AGPL 3.0 @@ -91,7 +91,7 @@ %postun server if test $1 -ge 1 then - /sbin/service mongod stop >/dev/null 2>&1 || : + /sbin/service mongod condrestart >/dev/null 2>&1 || : fi %files @@ -130,11 +130,6 @@ %attr(0755,mongod,mongod) %dir /var/log/mongo %attr(0640,mongod,mongod) %config(noreplace) %verify(not md5 size mtime) /var/log/mongo/mongod.log -%files devel -/usr/include/mongo -%{_libdir}/libmongoclient.a -#%{_libdir}/libmongotestfiles.a - %changelog * Thu Jan 28 2010 Richard M Kreuter - Minor fixes. diff -Nru mongodb-1.4.4/s/balance.cpp mongodb-1.6.3/s/balance.cpp --- mongodb-1.4.4/s/balance.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/balance.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,295 @@ +// balance.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" + +#include "../db/jsobj.h" +#include "../db/cmdline.h" + +#include "../client/distlock.h" + +#include "balance.h" +#include "server.h" +#include "shard.h" +#include "config.h" +#include "chunk.h" +#include "grid.h" + +namespace mongo { + + Balancer balancer; + + Balancer::Balancer() : _balancedLastTime(0), _policy( new BalancerPolicy ){} + + Balancer::~Balancer() { + delete _policy; + } + + int Balancer::_moveChunks( const vector* candidateChunks ) { + int movedCount = 0; + + for ( vector::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ){ + const CandidateChunk& chunkInfo = *it->get(); + + DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns ); + assert( cfg ); + + ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns ); + assert( cm ); + + const BSONObj& chunkToMove = chunkInfo.chunk; + ChunkPtr c = cm->findChunk( chunkToMove["min"].Obj() ); + if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) ){ + // likely a split happened somewhere + cm = cfg->getChunkManager( chunkInfo.ns , true ); + assert( cm ); + + c = cm->findChunk( chunkToMove["min"].Obj() ); + if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) ){ + log() << "chunk mismatch after reload, ignoring will retry issue cm: " + << c->getMin() << " min: " << chunkToMove["min"].Obj() << endl; + continue; + } + } + + string errmsg; + if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , errmsg ) ){ + movedCount++; + continue; + } + + log() << "MOVE FAILED **** " << errmsg << "\n" + << " from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkToMove << endl; + } + + return movedCount; + } + + void Balancer::_ping(){ + assert( _myid.size() && _started ); + try { + ScopedDbConnection conn( configServer.getPrimary() ); + _ping( conn.conn() ); + conn.done(); + } + catch ( std::exception& e ){ + log() << "bare ping failed: " << e.what() << endl; + } + + } + + void Balancer::_ping( DBClientBase& conn ){ + WriteConcern w = conn.getWriteConcern(); + conn.setWriteConcern( W_NONE ); + + conn.update( ShardNS::mongos , + BSON( "_id" << _myid ) , + BSON( "$set" << BSON( "ping" << DATENOW << "up" << (int)(time(0)-_started) ) ) , + true ); + + conn.setWriteConcern( w); + } + + bool Balancer::_checkOIDs(){ + vector all; + Shard::getAllShards( all ); + + map oids; + + for ( vector::iterator i=all.begin(); i!=all.end(); ++i ){ + Shard s = *i; + BSONObj f = s.runCommand( "admin" , "features" ); + if ( f["oidMachine"].isNumber() ){ + int x = f["oidMachine"].numberInt(); + if ( oids.count(x) == 0 ){ + oids[x] = s; + } + else { + log() << "error: 2 machines have " << x << " as oid machine piece " << s.toString() << " and " << oids[x].toString() << endl; + s.runCommand( "admin" , BSON( "features" << 1 << "oidReset" << 1 ) ); + oids[x].runCommand( "admin" , BSON( "features" << 1 << "oidReset" << 1 ) ); + return false; + } + } + else { + log() << "warning: oidMachine not set on: " << s.toString() << endl; + } + } + return true; + } + + void Balancer::_doBalanceRound( DBClientBase& conn, vector* candidateChunks ){ + assert( candidateChunks ); + + // + // 1. Check whether there is any sharded collection to be balanced by querying + // the ShardsNS::collections collection + // + + auto_ptr cursor = conn.query( ShardNS::collection , BSONObj() ); + vector< string > collections; + while ( cursor->more() ){ + BSONObj col = cursor->next(); + + // sharded collections will have a shard "key". + if ( ! col["key"].eoo() ) + collections.push_back( col["_id"].String() ); + } + cursor.reset(); + + if ( collections.empty() ) { + log(1) << "no collections to balance" << endl; + return; + } + + // + // 2. Get a list of all the shards that are participating in this balance round + // along with any maximum allowed quotas and current utilization. We get the + // latter by issuing db.serverStatus() (mem.mapped) to all shards. + // + // TODO: skip unresponsive shards and mark information as stale. + // + + vector allShards; + Shard::getAllShards( allShards ); + if ( allShards.size() < 2) { + log(1) << "can't balance without more active shards" << endl; + return; + } + + map< string, BSONObj > shardLimitsMap; + for ( vector::const_iterator it = allShards.begin(); it != allShards.end(); ++it ){ + const Shard& s = *it; + ShardStatus status = s.getStatus(); + + BSONObj limitsObj = BSON( ShardFields::maxSize( s.getMaxSize() ) << + ShardFields::currSize( status.mapped() ) << + ShardFields::draining( s.isDraining()) ); + + shardLimitsMap[ s.getName() ] = limitsObj; + } + + // + // 3. For each collection, check if the balancing policy recommends moving anything around. + // + + for (vector::const_iterator it = collections.begin(); it != collections.end(); ++it ) { + const string& ns = *it; + + map< string,vector > shardToChunksMap; + cursor = conn.query( ShardNS::chunk , QUERY( "ns" << ns ).sort( "min" ) ); + while ( cursor->more() ){ + BSONObj chunk = cursor->next(); + vector& chunks = shardToChunksMap[chunk["shard"].String()]; + chunks.push_back( chunk.getOwned() ); + } + cursor.reset(); + + if (shardToChunksMap.empty()) { + log(1) << "skipping empty collection (" << ns << ")"; + continue; + } + + for ( vector::iterator i=allShards.begin(); i!=allShards.end(); ++i ){ + // this just makes sure there is an entry in shardToChunksMap for every shard + Shard s = *i; + shardToChunksMap[s.getName()].size(); + } + + CandidateChunk* p = _policy->balance( ns , shardLimitsMap , shardToChunksMap , _balancedLastTime ); + if ( p ) candidateChunks->push_back( CandidateChunkPtr( p ) ); + } + } + + void Balancer::run(){ + + { // init stuff, don't want to do at static init + StringBuilder buf; + buf << getHostNameCached() << ":" << cmdLine.port; + _myid = buf.str(); + log(1) << "balancer myid: " << _myid << endl; + + _started = time(0); + + Shard::reloadShardInfo(); + } + + _ping(); + _checkOIDs(); + + ConnectionString config = configServer.getConnectionString(); + DistributedLock balanceLock( config , "balancer" ); + + while ( ! inShutdown() ){ + + try { + ScopedDbConnection conn( config ); + + _ping( conn.conn() ); + if ( ! _checkOIDs() ){ + uassert( 13258 , "oids broken after resetting!" , _checkOIDs() ); + } + + // use fresh shard state + Shard::reloadShardInfo(); + + dist_lock_try lk( &balanceLock , "doing balance round" ); + if ( ! lk.got() ){ + log(1) << "skipping balancing round during ongoing split or move activity." << endl; + conn.done(); + + sleepsecs( 30 ); // no need to wake up soon + continue; + } + + if ( ! grid.shouldBalance() ) { + log(1) << "skipping balancing round because balancing is disabled" << endl;; + conn.done(); + + sleepsecs( 30 ); + continue; + } + + log(1) << "*** start balancing round" << endl; + + vector candidateChunks; + _doBalanceRound( conn.conn() , &candidateChunks ); + if ( candidateChunks.size() == 0 ) { + log(1) << "no need to move any chunk" << endl; + } else { + _balancedLastTime = _moveChunks( &candidateChunks ); + } + + log(1) << "*** end of balancing round" << endl; + conn.done(); + + sleepsecs( _balancedLastTime ? 5 : 10 ); + } + catch ( std::exception& e ){ + log() << "caught exception while doing balance: " << e.what() << endl; + + // Just to match the opening statement if in log level 1 + log(1) << "*** End of balancing round" << endl; + + sleepsecs( 30 ); // sleep a fair amount b/c of error + continue; + } + } + } + +} // namespace mongo diff -Nru mongodb-1.4.4/s/balance.h mongodb-1.6.3/s/balance.h --- mongodb-1.4.4/s/balance.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/balance.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,80 @@ +// balance.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../pch.h" +#include "../util/background.h" +#include "../client/dbclient.h" +#include "balancer_policy.h" + +namespace mongo { + + class Balancer : public BackgroundJob { + public: + Balancer(); + virtual ~Balancer(); + + // BackgroundJob methods + + virtual void run(); + + virtual string name() { return "Balancer"; } + + private: + typedef BalancerPolicy::ChunkInfo CandidateChunk; + typedef shared_ptr CandidateChunkPtr; + + /** + * Gathers all the necessary information about shards and chunks, and + * decides whether there are candidate chunks to be moved. + */ + void _doBalanceRound( DBClientBase& conn, vector* candidateChunks ); + + /** + * Execute the chunk migrations described in 'candidateChunks' and + * returns the number of chunks effectively moved. + */ + int _moveChunks( const vector* candidateChunks ); + + /** + * Check the health of the master configuration server + */ + void _ping(); + void _ping( DBClientBase& conn ); + + /** + * @return true if everything is ok + */ + bool _checkOIDs(); + + // internal state + + string _myid; // hostname:port of my mongos + time_t _started; // time Balancer starte running + int _balancedLastTime; // number of moved chunks in last round + BalancerPolicy* _policy; // decide which chunks to move; owned here. + + // non-copyable, non-assignable + + Balancer(const Balancer&); + Balancer operator=(const Balancer&); + }; + + extern Balancer balancer; +} diff -Nru mongodb-1.4.4/s/balancer_policy.cpp mongodb-1.6.3/s/balancer_policy.cpp --- mongodb-1.4.4/s/balancer_policy.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/balancer_policy.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,296 @@ +// balancer_policy.cpp + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" + +#include "config.h" + +#include "../client/dbclient.h" +#include "../util/stringutils.h" +#include "../util/unittest.h" + +#include "balancer_policy.h" + +namespace mongo { + + BalancerPolicy::ChunkInfo* BalancerPolicy::balance( const string& ns, + const ShardToLimitsMap& shardToLimitsMap, + const ShardToChunksMap& shardToChunksMap, + int balancedLastTime ){ + pair min("",numeric_limits::max()); + pair max("",0); + vector drainingShards; + + for (ShardToChunksIter i = shardToChunksMap.begin(); i!=shardToChunksMap.end(); ++i ){ + + // Find whether this shard has reached its size cap or whether it is being removed. + const string& shard = i->first; + BSONObj shardLimits; + ShardToLimitsIter it = shardToLimitsMap.find( shard ); + if ( it != shardToLimitsMap.end() ) shardLimits = it->second; + const bool maxedOut = isSizeMaxed( shardLimits ); + const bool draining = isDraining( shardLimits ); + + // Check whether this shard is a better chunk receiver then the current one. + // Maxed out shards or draining shards cannot be considered receivers. + const unsigned size = i->second.size(); + if ( ! maxedOut && ! draining ){ + if ( size < min.second ){ + min = make_pair( shard , size ); + } + } + + // Check whether this shard is a better chunk donor then the current one. + // Draining shards take a lower priority than overloaded shards. + if ( size > max.second ){ + max = make_pair( shard , size ); + } + if ( draining && (size > 0)){ + drainingShards.push_back( shard ); + } + } + + // If there is no candidate chunk receiver -- they may have all been maxed out, + // draining, ... -- there's not much that the policy can do. + if ( min.second == numeric_limits::max() ){ + log() << "no availalable shards to take chunks" << endl; + return NULL; + } + + log(1) << "collection : " << ns << endl; + log(1) << "donor : " << max.second << " chunks on " << max.first << endl; + log(1) << "receiver : " << min.second << " chunks on " << min.first << endl; + if ( ! drainingShards.empty() ){ + string drainingStr; + joinStringDelim( drainingShards, &drainingStr, ',' ); + log(1) << "draining : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl; + } + + // Solving imbalances takes a higher priority than draining shards. Many shards can + // be draining at once but we choose only one of them to cater to per round. + const int imbalance = max.second - min.second; + const int threshold = balancedLastTime ? 2 : 8; + string from, to; + if ( imbalance >= threshold ){ + from = max.first; + to = min.first; + + } else if ( ! drainingShards.empty() ){ + from = drainingShards[ rand() % drainingShards.size() ]; + to = min.first; + + } else { + // Everything is balanced here! + return NULL; + } + + const vector& chunksFrom = shardToChunksMap.find( from )->second; + const vector& chunksTo = shardToChunksMap.find( to )->second; + BSONObj chunkToMove = pickChunk( chunksFrom , chunksTo ); + log() << "chose [" << from << "] to [" << to << "] " << chunkToMove << endl; + + return new ChunkInfo( ns, to, from, chunkToMove ); + } + + BSONObj BalancerPolicy::pickChunk( const vector& from, const vector& to ){ + // It is possible for a donor ('from') shard to have less chunks than a recevier one ('to') + // if the donor is in draining mode. + + if ( to.size() == 0 ) + return from[0]; + + if ( from[0]["min"].Obj().woCompare( to[to.size()-1]["max"].Obj() , BSONObj() , false ) == 0 ) + return from[0]; + + if ( from[from.size()-1]["max"].Obj().woCompare( to[0]["min"].Obj() , BSONObj() , false ) == 0 ) + return from[from.size()-1]; + + return from[0]; + } + + bool BalancerPolicy::isSizeMaxed( BSONObj limits ){ + // If there's no limit information for the shard, assume it can be a chunk receiver + // (i.e., there's not bound on space utilization) + if ( limits.isEmpty() ){ + return false; + } + + long long maxUsage = limits[ ShardFields::maxSize.name() ].Long(); + if ( maxUsage == 0 ){ + return false; + } + + long long currUsage = limits[ ShardFields::currSize.name() ].Long(); + if ( currUsage < maxUsage ){ + return false; + } + + return true; + } + + bool BalancerPolicy::isDraining( BSONObj limits ){ + BSONElement draining = limits[ ShardFields::draining.name() ]; + if ( draining.eoo() || ! draining.Bool() ){ + return false; + } + + return true; + } + + class PolicyObjUnitTest : public UnitTest { + public: + + typedef ShardFields sf; // convenience alias + + void caseSizeMaxedShard(){ + BSONObj shard0 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) ); + assert( ! BalancerPolicy::isSizeMaxed( shard0 ) ); + + BSONObj shard1 = BSON( sf::maxSize(100LL) << sf::currSize(80LL) ); + assert( ! BalancerPolicy::isSizeMaxed( shard1 ) ); + + BSONObj shard2 = BSON( sf::maxSize(100LL) << sf::currSize(110LL) ); + assert( BalancerPolicy::isSizeMaxed( shard2 ) ); + + BSONObj empty; + assert( ! BalancerPolicy::isSizeMaxed( empty ) ); + } + + void caseDrainingShard(){ + BSONObj shard0 = BSON( sf::draining(true) ); + assert( BalancerPolicy::isDraining( shard0 ) ); + + BSONObj shard1 = BSON( sf::draining(false) ); + assert( ! BalancerPolicy::isDraining( shard1 ) ); + + BSONObj empty; + assert( ! BalancerPolicy::isDraining( empty ) ); + } + + void caseBalanceNormal(){ + // 2 chunks and 0 chunk shards + BalancerPolicy::ShardToChunksMap chunkMap; + vector chunks; + chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) << + "max" << BSON( "x" << 49 ))); + chunks.push_back(BSON( "min" << BSON( "x" << 49 ) << + "max" << BSON( "x" << BSON( "$maxkey"<<1 )))); + chunkMap["shard0"] = chunks; + chunks.clear(); + chunkMap["shard1"] = chunks; + + // no limits + BalancerPolicy::ShardToLimitsMap limitsMap; + BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(false) ); + BSONObj limits1 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) << sf::draining(false) ); + limitsMap["shard0"] = limits0; + limitsMap["shard1"] = limits1; + + BalancerPolicy::ChunkInfo* c = NULL; + c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 1 ); + assert( c != NULL ); + } + + void caseBalanceDraining(){ + // one normal, one draining + // 2 chunks and 0 chunk shards + BalancerPolicy::ShardToChunksMap chunkMap; + vector chunks; + chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) << + "max" << BSON( "x" << 49 ))); + chunkMap["shard0"] = chunks; + chunks.clear(); + chunks.push_back(BSON( "min" << BSON( "x" << 49 ) << + "max" << BSON( "x" << BSON( "$maxkey"<<1 )))); + chunkMap["shard1"] = chunks; + + // shard0 is draining + BalancerPolicy::ShardToLimitsMap limitsMap; + BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(true) ); + BSONObj limits1 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) << sf::draining(false) ); + limitsMap["shard0"] = limits0; + limitsMap["shard1"] = limits1; + + BalancerPolicy::ChunkInfo* c = NULL; + c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 ); + assert( c != NULL ); + assert( c->to == "shard1" ); + assert( c->from == "shard0" ); + assert( ! c->chunk.isEmpty() ); + } + + void caseBalanceEndedDraining(){ + // 2 chunks and 0 chunk (drain completed) shards + BalancerPolicy::ShardToChunksMap chunkMap; + vector chunks; + chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) << + "max" << BSON( "x" << 49 ))); + chunks.push_back(BSON( "min" << BSON( "x" << 49 ) << + "max" << BSON( "x" << BSON( "$maxkey"<<1 )))); + chunkMap["shard0"] = chunks; + chunks.clear(); + chunkMap["shard1"] = chunks; + + // no limits + BalancerPolicy::ShardToLimitsMap limitsMap; + BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(false) ); + BSONObj limits1 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) << sf::draining(true) ); + limitsMap["shard0"] = limits0; + limitsMap["shard1"] = limits1; + + BalancerPolicy::ChunkInfo* c = NULL; + c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 ); + assert( c == NULL ); + } + + void caseBalanceImpasse(){ + // one maxed out, one draining + // 2 chunks and 0 chunk shards + BalancerPolicy::ShardToChunksMap chunkMap; + vector chunks; + chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) << + "max" << BSON( "x" << 49 ))); + chunkMap["shard0"] = chunks; + chunks.clear(); + chunks.push_back(BSON( "min" << BSON( "x" << 49 ) << + "max" << BSON( "x" << BSON( "$maxkey"<<1 )))); + chunkMap["shard1"] = chunks; + + // shard0 is draining, shard1 is maxed out + BalancerPolicy::ShardToLimitsMap limitsMap; + BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(true) ); + BSONObj limits1 = BSON( sf::maxSize(1LL) << sf::currSize(1LL) << sf::draining(false) ); + limitsMap["shard0"] = limits0; + limitsMap["shard1"] = limits1; + + BalancerPolicy::ChunkInfo* c = NULL; + c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 ); + assert( c == NULL ); + } + + void run(){ + caseSizeMaxedShard(); + caseDrainingShard(); + caseBalanceNormal(); + caseBalanceDraining(); + caseBalanceImpasse(); + log(1) << "policyObjUnitTest passed" << endl; + } + } policyObjUnitTest; + +} // namespace mongo diff -Nru mongodb-1.4.4/s/balancer_policy.h mongodb-1.6.3/s/balancer_policy.h --- mongodb-1.4.4/s/balancer_policy.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/balancer_policy.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,84 @@ +// balancer_policy.h + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef S_BALANCER_POLICY_HEADER +#define S_BALANCER_POLICY_HEADER + +#include "../pch.h" + +namespace mongo { + + class BalancerPolicy { + public: + struct ChunkInfo; + + /** + * Returns a suggested chunk to move whithin a collection's shards, given information about + * space usage and number of chunks for that collection. If the policy doesn't recommend + * moving, it returns NULL. + * + * @param ns is the collections namepace. + * @param shardLimitMap is a map from shardId to an object that describes (for now) space + * cap and usage. E.g.: { "maxSize" : , "usedSize" : }. + * @param shardToChunksMap is a map from shardId to chunks that live there. A chunk's format + * is { }. + * @param balancedLastTime is the number of chunks effectively moved in the last round. + * @returns NULL or ChunkInfo of the best move to make towards balacing the collection. + */ + typedef map< string,BSONObj > ShardToLimitsMap; + typedef map< string,vector > ShardToChunksMap; + static ChunkInfo* balance( const string& ns, const ShardToLimitsMap& shardToLimitsMap, + const ShardToChunksMap& shardToChunksMap, int balancedLastTime ); + + // below exposed for testing purposes only -- treat it as private -- + + static BSONObj pickChunk( const vector& from, const vector& to ); + + /** + * Returns true if a shard cannot receive any new chunks bacause it reache 'shardLimits'. + * Expects the optional fields "maxSize", can in size in MB, and "usedSize", currently used size + * in MB, on 'shardLimits'. + */ + static bool isSizeMaxed( BSONObj shardLimits ); + + /** + * Returns true if 'shardLimist' contains a field "draining". Expects the optional field + * "isDraining" on 'shrdLimits'. + */ + static bool isDraining( BSONObj shardLimits ); + + private: + // Convenience types + typedef ShardToChunksMap::const_iterator ShardToChunksIter; + typedef ShardToLimitsMap::const_iterator ShardToLimitsIter; + + }; + + struct BalancerPolicy::ChunkInfo { + const string ns; + const string to; + const string from; + const BSONObj chunk; + + ChunkInfo( const string& a_ns , const string& a_to , const string& a_from , const BSONObj& a_chunk ) + : ns( a_ns ) , to( a_to ) , from( a_from ), chunk( a_chunk ){} + }; + +} // namespace mongo + +#endif // S_BALANCER_POLICY_HEADER diff -Nru mongodb-1.4.4/s/chunk.cpp mongodb-1.6.3/s/chunk.cpp --- mongodb-1.4.4/s/chunk.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/chunk.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,29 +16,53 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "chunk.h" #include "config.h" +#include "grid.h" #include "../util/unittest.h" #include "../client/connpool.h" +#include "../client/distlock.h" +#include "../db/queryutil.h" #include "cursors.h" #include "strategy.h" namespace mongo { + inline bool allOfType(BSONType type, const BSONObj& o){ + BSONObjIterator it(o); + while(it.more()){ + if (it.next().type() != type) + return false; + } + return true; + } + + RWLock chunkSplitLock("rw:chunkSplitLock"); + // ------- Shard -------- - int Chunk::MaxChunkSize = 1024 * 1204 * 200; + int Chunk::MaxChunkSize = 1024 * 1024 * 200; - Chunk::Chunk( ChunkManager * manager ) : _manager( manager ){ - _modified = false; - _lastmod = 0; - _dataWritten = 0; + Chunk::Chunk( ChunkManager * manager ) + : _manager(manager), + _lastmod(0), _modified(false), _dataWritten(0) + {} + + Chunk::Chunk(ChunkManager * info , const BSONObj& min, const BSONObj& max, const Shard& shard) + : _manager(info), _min(min), _max(max), _shard(shard), + _lastmod(0), _modified(false), _dataWritten(0) + {} + + string Chunk::getns() const { + assert( _manager ); + return _manager->getns(); } - void Chunk::setShard( string s ){ + void Chunk::setShard( const Shard& s ){ _shard = s; - _markModified(); + _manager->_migrationNotification(this); + _modified = true; } bool Chunk::contains( const BSONObj& obj ) const{ @@ -47,18 +71,33 @@ _manager->getShardKey().compare( obj , getMax() ) < 0; } + bool ChunkRange::contains(const BSONObj& obj) const { + // same as Chunk method + return + _manager->getShardKey().compare( getMin() , obj ) <= 0 && + _manager->getShardKey().compare( obj , getMax() ) < 0; + } + + bool Chunk::minIsInf() const { + return _manager->getShardKey().globalMin().woCompare( getMin() ) == 0; + } + + bool Chunk::maxIsInf() const { + return _manager->getShardKey().globalMax().woCompare( getMax() ) == 0; + } + BSONObj Chunk::pickSplitPoint() const{ int sort = 0; - if ( _manager->getShardKey().globalMin().woCompare( getMin() ) == 0 ){ + if ( minIsInf() ){ sort = 1; } - else if ( _manager->getShardKey().globalMax().woCompare( getMax() ) == 0 ){ + else if ( maxIsInf() ){ sort = -1; } if ( sort ){ - ScopedDbConnection conn( getShard() ); + ShardConnection conn( getShard().getConnString() , _manager->getns() ); Query q; if ( sort == 1 ) q.sort( _manager->getShardKey().key() ); @@ -75,212 +114,313 @@ q.sort( r.obj() ); } - BSONObj end = conn->findOne( _ns , q ); + BSONObj end = conn->findOne( _manager->getns() , q ); conn.done(); if ( ! end.isEmpty() ) return _manager->getShardKey().extractKey( end ); } - ScopedDbConnection conn( getShard() ); + BSONObj cmd = BSON( "medianKey" << _manager->getns() + << "keyPattern" << _manager->getShardKey().key() + << "min" << getMin() + << "max" << getMax() ); + + ScopedDbConnection conn( getShard().getConnString() ); BSONObj result; - if ( ! conn->runCommand( "admin" , BSON( "medianKey" << _ns - << "keyPattern" << _manager->getShardKey().key() - << "min" << getMin() - << "max" << getMax() - ) , result ) ){ + if ( ! conn->runCommand( "admin" , cmd , result ) ){ stringstream ss; ss << "medianKey command failed: " << result; uassert( 10164 , ss.str() , 0 ); } - BSONObj median = result.getObjectField( "median" ); - if (median == getMin()){ - //TODO compound support - BSONElement key = getMin().firstElement(); - BSONObjBuilder b; - b.appendAs("$gt", key); + BSONObj median = result.getObjectField( "median" ).getOwned(); + conn.done(); + - Query q = QUERY(key.fieldName() << b.obj()); + if (median == getMin()){ + Query q; + q.minKey(_min).maxKey(_max); q.sort(_manager->getShardKey().key()); - median = conn->findOne(_ns, q); + median = conn->findOne(_manager->getns(), q); median = _manager->getShardKey().extractKey( median ); - PRINT(median); } + + if ( median < getMin() || median >= getMax() ){ + stringstream ss; + ss << "medianKey returned value out of range. " + << " cmd: " << cmd + << " result: " << result; + uasserted( 13394 , ss.str() ); + } + + return median; + } + void Chunk::pickSplitVector( vector* splitPoints ) const { + // Ask the mongod holding this chunk to figure out the split points. + ScopedDbConnection conn( getShard().getConnString() ); + BSONObj result; + BSONObjBuilder cmd; + cmd.append( "splitVector" , _manager->getns() ); + cmd.append( "keyPattern" , _manager->getShardKey().key() ); + cmd.append( "maxChunkSize" , Chunk::MaxChunkSize / (1<<20) ); + BSONObj cmdObj = cmd.obj(); + + if ( ! conn->runCommand( "admin" , cmdObj , result )){ + ostringstream os; + os << "splitVector command failed: " << result; + uassert( 13345 , os.str() , 0 ); + } + + BSONObjIterator it( result.getObjectField( "splitKeys" ) ); + while ( it.more() ){ + splitPoints->push_back( it.next().Obj().getOwned() ); + } conn.done(); - - return median.getOwned(); } - Chunk * Chunk::split(){ - return split( pickSplitPoint() ); + ChunkPtr Chunk::split(){ + vector splitPoints; + splitPoints.push_back( pickSplitPoint() ); + return multiSplit( splitPoints ); } - Chunk * Chunk::split( const BSONObj& m ){ - uassert( 10165 , "can't split as shard that doesn't have a manager" , _manager ); + ChunkPtr Chunk::multiSplit( const vector& m ){ + const size_t maxSplitPoints = 256; + + uassert( 10165 , "can't split as shard doesn't have a manager" , _manager ); + uassert( 13332 , "need a split key to split chunk" , !m.empty() ); + uassert( 13333 , "can't split a chunk in that many parts", m.size() < maxSplitPoints ); + uassert( 13003 , "can't split a chunk with only one distinct value" , _min.woCompare(_max) ); + + DistributedLock lockSetup( ConnectionString( modelServer() , ConnectionString::SYNC ) , getns() ); + dist_lock_try dlk( &lockSetup , string("split-") + toString() ); + uassert( 10166 , "locking namespace failed" , dlk.got() ); - log(1) << " before split on: " << m << "\n" - << "\t self : " << toString() << endl; + { + ShardChunkVersion onServer = getVersionOnConfigServer(); + ShardChunkVersion mine = _lastmod; + if ( onServer > mine ){ + stringstream ss; + ss << "mulitSplit failing because config not up to date" + << " onServer: " << onServer.toString() + << " mine: " << mine.toString(); - uassert( 10166 , "locking namespace on server failed" , lockNamespaceOnServer( getShard() , _ns ) ); - uassert( 13003 , "can't split chunk. does it have only one distinct value?" , - !m.isEmpty() && _min.woCompare(m) && _max.woCompare(m)); + //reload config + grid.getDBConfig(_manager->_ns)->getChunkManager(_manager->_ns, true); - Chunk * s = new Chunk( _manager ); - s->_ns = _ns; - s->_shard = _shard; - s->setMin(m.getOwned()); - s->setMax(_max); - - s->_markModified(); - _markModified(); - - _manager->_chunks.push_back( s ); - - setMax(m.getOwned()); - - log(1) << " after split:\n" - << "\t left : " << toString() << "\n" - << "\t right: "<< s->toString() << endl; - - - _manager->save(); + uasserted( 13387 , ss.str() ); + } + } + + BSONObjBuilder detail; + appendShortVersion( "before" , detail ); + log(1) << "before split on " << m.size() << " points " << toString() << endl; + + // Iterate over the split points in 'm', splitting off a new chunk per entry. That chunk's range + // covers until the next entry in 'm' or _max . + vector newChunks; + vector::const_iterator i = m.begin(); + BSONObj nextPoint = i->getOwned(); + _modified = true; + do { + BSONObj splitPoint = nextPoint; + log(4) << "splitPoint: " << splitPoint << endl; + nextPoint = (++i != m.end()) ? i->getOwned() : _max.getOwned(); + log(4) << "nextPoint: " << nextPoint << endl; + + if ( nextPoint <= splitPoint) { + stringstream ss; + ss << "multiSplit failing because keys min: " << splitPoint << " and max: " << nextPoint + << " do not define a valid chunk"; + uasserted( 13395, ss.str() ); + } + + ChunkPtr c( new Chunk( _manager, splitPoint , nextPoint , _shard) ); + c->_modified = true; + newChunks.push_back( c ); + } while ( i != m.end() ); + + // Have the chunk manager reflect the key change for the first chunk and create an entry for every + // new chunk spawned by it. + { + rwlock lk( _manager->_lock , true ); + + setMax(m[0].getOwned()); + DEV assert( shared_from_this() ); + _manager->_chunkMap[_max] = shared_from_this(); + + for ( vector::const_iterator it = newChunks.begin(); it != newChunks.end(); ++it ){ + ChunkPtr s = *it; + _manager->_chunkMap[s->getMax()] = s; + } + } - return s; - } + log(1) << "after split adjusted range: " << toString() << endl; + for ( vector::const_iterator it = newChunks.begin(); it != newChunks.end(); ++it ){ + ChunkPtr s = *it; + log(1) << "after split created new chunk: " << s->toString() << endl; + } + + // Save the new key boundaries in the configDB. + _manager->save( false ); + + // Log all these changes in the configDB's log. We log a simple split differently than a multi-split. + if ( newChunks.size() == 1) { + appendShortVersion( "left" , detail ); + newChunks[0]->appendShortVersion( "right" , detail ); + configServer.logChange( "split" , _manager->getns(), detail.obj() ); + + } else { + BSONObj beforeDetailObj = detail.obj(); + BSONObj firstDetailObj = beforeDetailObj.getOwned(); + const int newChunksSize = newChunks.size(); + + BSONObjBuilder firstDetail; + firstDetail.appendElements( beforeDetailObj ); + firstDetail.append( "number" , 0 ); + firstDetail.append( "of" , newChunksSize ); + appendShortVersion( "chunk" , firstDetail ); + configServer.logChange( "multi-split" , _manager->getns() , firstDetail.obj() ); + + for ( int i=0; i < newChunksSize; i++ ){ + BSONObjBuilder chunkDetail; + chunkDetail.appendElements( beforeDetailObj ); + chunkDetail.append( "number", i+1 ); + chunkDetail.append( "of" , newChunksSize ); + newChunks[i]->appendShortVersion( "chunk" , chunkDetail ); + configServer.logChange( "multi-split" , _manager->getns() , chunkDetail.obj() ); + } + } - bool Chunk::moveAndCommit( const string& to , string& errmsg ){ - uassert( 10167 , "can't move shard to its current location!" , to != getShard() ); + return newChunks[0]; + } - log() << "moving chunk ns: " << _ns << " moving chunk: " << toString() << " " << _shard << " -> " << to << endl; + bool Chunk::moveAndCommit( const Shard& to , string& errmsg ){ + uassert( 10167 , "can't move shard to its current location!" , getShard() != to ); - string from = _shard; - ShardChunkVersion oldVersion = _manager->getVersion( from ); + log() << "moving chunk ns: " << _manager->getns() << " moving ( " << toString() << ") " << _shard.toString() << " -> " << to.toString() << endl; - BSONObj filter; - { - BSONObjBuilder b; - getFilter( b ); - filter = b.obj(); - } + Shard from = _shard; - ScopedDbConnection fromconn( from ); + ScopedDbConnection fromconn( from); - BSONObj startRes; + BSONObj res; bool worked = fromconn->runCommand( "admin" , - BSON( "movechunk.start" << _ns << - "from" << from << - "to" << to << - "filter" << filter + BSON( "moveChunk" << _manager->getns() << + "from" << from.getConnString() << + "to" << to.getConnString() << + "min" << _min << + "max" << _max << + "shardId" << genID() << + "configdb" << configServer.modelServer() ) , - startRes + res ); - if ( ! worked ){ - errmsg = (string)"movechunk.start failed: " + startRes.toString(); - fromconn.done(); - return false; - } - - // update config db - setShard( to ); - - // need to increment version # for old server - Chunk * randomChunkOnOldServer = _manager->findChunkOnServer( from ); - if ( randomChunkOnOldServer ) - randomChunkOnOldServer->_markModified(); - - _manager->save(); - - BSONObj finishRes; - { + fromconn.done(); - ShardChunkVersion newVersion = _manager->getVersion( from ); - if ( newVersion == 0 && oldVersion > 0 ){ - newVersion = oldVersion; - newVersion++; - _manager->save(); - } - else if ( newVersion <= oldVersion ){ - log() << "newVersion: " << newVersion << " oldVersion: " << oldVersion << endl; - uassert( 10168 , "version has to be higher" , newVersion > oldVersion ); - } - - BSONObjBuilder b; - b << "movechunk.finish" << _ns; - b << "to" << to; - b.appendTimestamp( "newVersion" , newVersion ); - b.append( startRes["finishToken"] ); - - worked = fromconn->runCommand( "admin" , - b.done() , - finishRes ); + if ( worked ){ + _manager->_reload(); + return true; } - if ( ! worked ){ - errmsg = (string)"movechunk.finish failed: " + finishRes.toString(); - fromconn.done(); - return false; - } - - fromconn.done(); - return true; + errmsg = res["errmsg"].String(); + errmsg += " " + res.toString(); + return false; } bool Chunk::splitIfShould( long dataWritten ){ + LastError::Disabled d( lastError.get() ); + try { + return _splitIfShould( dataWritten ); + } + catch ( std::exception& e ){ + log( LL_ERROR ) << "splitIfShould failed: " << e.what() << endl; + return false; + } + } + + bool Chunk::_splitIfShould( long dataWritten ){ _dataWritten += dataWritten; - if ( _dataWritten < MaxChunkSize / 5 ) + // split faster in early chunks helps spread out an initial load better + int splitThreshold; + const int minChunkSize = 1 << 20; // 1 MBytes + int numChunks = getManager()->numChunks(); + if ( numChunks < 10 ){ + splitThreshold = max( MaxChunkSize / 4 , minChunkSize ); + } else if ( numChunks < 20 ){ + splitThreshold = max( MaxChunkSize / 2 , minChunkSize ); + } else { + splitThreshold = max( MaxChunkSize , minChunkSize ); + } + + if ( minIsInf() || maxIsInf() ){ + splitThreshold = (int) ((double)splitThreshold * .9); + } + + if ( _dataWritten < splitThreshold / 5 ) + return false; + + if ( ! chunkSplitLock.lock_try(0) ) return false; - log(1) << "\t want to split chunk : " << this << endl; + rwlock lk( chunkSplitLock , 1 , true ); + + log(3) << "\t splitIfShould : " << *this << endl; _dataWritten = 0; - BSONObj split_point = pickSplitPoint(); - if ( split_point.isEmpty() || _min == split_point || _max == split_point) { + BSONObj splitPoint = pickSplitPoint(); + if ( splitPoint.isEmpty() || _min == splitPoint || _max == splitPoint) { log() << "SHARD PROBLEM** shard is too big, but can't split: " << toString() << endl; return false; } long size = getPhysicalSize(); - if ( size < MaxChunkSize ) + if ( size < splitThreshold ) return false; - log() << "autosplitting " << _ns << " size: " << size << " shard: " << toString() << endl; - Chunk * newShard = split(split_point); + log() << "autosplitting " << _manager->getns() << " size: " << size << " shard: " << toString() + << " on: " << splitPoint << "(splitThreshold " << splitThreshold << ")" << endl; + + vector splitPoints; + splitPoints.push_back( splitPoint ); + ChunkPtr newShard = multiSplit( splitPoints ); moveIfShould( newShard ); return true; } - bool Chunk::moveIfShould( Chunk * newChunk ){ - Chunk * toMove = 0; + bool Chunk::moveIfShould( ChunkPtr newChunk ){ + ChunkPtr toMove; - if ( newChunk->countObjects() <= 1 ){ + if ( newChunk->countObjects(2) <= 1 ){ toMove = newChunk; } - else if ( this->countObjects() <= 1 ){ - toMove = this; + else if ( this->countObjects(2) <= 1 ){ + DEV assert( shared_from_this() ); + toMove = shared_from_this(); } else { - log(1) << "don't know how to decide if i should move inner shard" << endl; + // moving middle shards is handled by balancer + return false; } - if ( ! toMove ) - return false; + assert( toMove ); - string newLocation = grid.pickShardForNewDB(); - if ( newLocation == getShard() ){ + Shard newLocation = Shard::pick(); + if ( getShard() == newLocation ){ // if this is the best server, then we shouldn't do anything! - log(1) << "not moving chunk: " << toString() << " b/c would move to same place " << newLocation << " -> " << getShard() << endl; + log(1) << "not moving chunk: " << toString() << " b/c would move to same place " << newLocation.toString() << " -> " << getShard().toString() << endl; return 0; } - log() << "moving chunk (auto): " << toMove->toString() << " to: " << newLocation << " #objcets: " << toMove->countObjects() << endl; + log() << "moving chunk (auto): " << toMove->toString() << " to: " << newLocation.toString() << " #objects: " << toMove->countObjects() << endl; string errmsg; massert( 10412 , (string)"moveAndCommit failed: " + errmsg , @@ -290,32 +430,44 @@ } long Chunk::getPhysicalSize() const{ - ScopedDbConnection conn( getShard() ); + ScopedDbConnection conn( getShard().getConnString() ); BSONObj result; - uassert( 10169 , "datasize failed!" , conn->runCommand( "admin" , BSON( "datasize" << _ns - << "keyPattern" << _manager->getShardKey().key() - << "min" << getMin() - << "max" << getMax() - ) , result ) ); + uassert( 10169 , "datasize failed!" , conn->runCommand( "admin" , + BSON( "datasize" << _manager->getns() + << "keyPattern" << _manager->getShardKey().key() + << "min" << getMin() + << "max" << getMax() + << "maxSize" << ( MaxChunkSize + 1 ) + << "estimate" << true + ) , result ) ); conn.done(); return (long)result["size"].number(); } - - long Chunk::countObjects( const BSONObj& filter ) const{ - ScopedDbConnection conn( getShard() ); - - BSONObj f = getFilter(); - if ( ! filter.isEmpty() ) - f = ClusteredCursor::concatQuery( f , filter ); + int Chunk::countObjects(int maxCount) const { + static const BSONObj fields = BSON("_id" << 1 ); - BSONObj result; - unsigned long long n = conn->count( _ns , f ); + ShardConnection conn( getShard() , _manager->getns() ); + // not using regular count as this is more flexible and supports $min/$max + Query q = Query().minKey(_min).maxKey(_max); + int n; + { + auto_ptr c = conn->query(_manager->getns(), q, maxCount, 0, &fields); + assert( c.get() ); + n = c->itcount(); + } conn.done(); - return (long)n; + return n; + } + + void Chunk::appendShortVersion( const char * name , BSONObjBuilder& b ){ + BSONObjBuilder bb( b.subobjStart( name ) ); + bb.append( "min" , _min ); + bb.append( "max" , _max ); + bb.done(); } bool Chunk::operator==( const Chunk& s ) const{ @@ -325,81 +477,82 @@ ; } - void Chunk::getFilter( BSONObjBuilder& b ) const{ - _manager->_key.getFilter( b , _min , _max ); - } - - void Chunk::serialize(BSONObjBuilder& to){ - if ( _lastmod ) + void Chunk::serialize(BSONObjBuilder& to,ShardChunkVersion myLastMod){ + + to.append( "_id" , genID( _manager->getns() , _min ) ); + + if ( myLastMod.isSet() ){ + to.appendTimestamp( "lastmod" , myLastMod ); + } + else if ( _lastmod.isSet() ){ + assert( _lastmod > 0 && _lastmod < 1000 ); to.appendTimestamp( "lastmod" , _lastmod ); - else - to.appendTimestamp( "lastmod" ); + } + else { + assert(0); + } - to << "ns" << _ns; + to << "ns" << _manager->getns(); to << "min" << _min; to << "max" << _max; - to << "shard" << _shard; + to << "shard" << _shard.getName(); + } + + string Chunk::genID( const string& ns , const BSONObj& o ) { + StringBuilder buf( ns.size() + o.objsize() + 16 ); + buf << ns << "-"; + + BSONObjIterator i(o); + while ( i.more() ){ + BSONElement e = i.next(); + buf << e.fieldName() << "_" << e.toString(false, true); + } + + return buf.str(); } void Chunk::unserialize(const BSONObj& from){ - _ns = from.getStringField( "ns" ); - _shard = from.getStringField( "shard" ); - _lastmod = from.hasField( "lastmod" ) ? from["lastmod"]._numberLong() : 0; + string ns = from.getStringField( "ns" ); + _shard.reset( from.getStringField( "shard" ) ); + + _lastmod = from["lastmod"]; + assert( _lastmod > 0 ); BSONElement e = from["minDotted"]; - cout << from << endl; + if (e.eoo()){ _min = from.getObjectField( "min" ).getOwned(); _max = from.getObjectField( "max" ).getOwned(); - } else { // TODO delete this case after giving people a chance to migrate + } + else { // TODO delete this case after giving people a chance to migrate _min = e.embeddedObject().getOwned(); _max = from.getObjectField( "maxDotted" ).getOwned(); } - uassert( 10170 , "Chunk needs a ns" , ! _ns.empty() ); - uassert( 10171 , "Chunk needs a server" , ! _ns.empty() ); + uassert( 10170 , "Chunk needs a ns" , ! ns.empty() ); + uassert( 13327 , "Chunk ns must match server ns" , ns == _manager->getns() ); + + uassert( 10171 , "Chunk needs a server" , _shard.ok() ); uassert( 10172 , "Chunk needs a min" , ! _min.isEmpty() ); uassert( 10173 , "Chunk needs a max" , ! _max.isEmpty() ); } - string Chunk::modelServer() { + string Chunk::modelServer() const { // TODO: this could move around? return configServer.modelServer(); } - void Chunk::_markModified(){ - _modified = true; - // set to 0 so that the config server sets it - _lastmod = 0; - } - - void Chunk::save( bool check ){ - bool reload = ! _lastmod; - Model::save( check ); - if ( reload ){ - // need to do this so that we get the new _lastMod and therefore version number - massert( 10413 , "_id has to be filled in already" , ! _id.isEmpty() ); - - string b = toString(); - BSONObj q = _id.copy(); - massert( 10414 , "how could load fail?" , load( q ) ); - log(2) << "before: " << q << "\t" << b << endl; - log(2) << "after : " << _id << "\t" << toString() << endl; - massert( 10415 , "chunk reload changed content!" , b == toString() ); - massert( 10416 , "id changed!" , q["_id"] == _id["_id"] ); - } - } - - void Chunk::ensureIndex(){ - ScopedDbConnection conn( getShard() ); - conn->ensureIndex( _ns , _manager->getShardKey().key() , _manager->_unique ); + ShardChunkVersion Chunk::getVersionOnConfigServer() const { + ScopedDbConnection conn( modelServer() ); + BSONObj o = conn->findOne( ShardNS::chunk , BSON( "_id" << genID() ) ); conn.done(); + return o["lastmod"]; } string Chunk::toString() const { stringstream ss; - ss << "shard ns:" << _ns << " shard: " << _shard << " min: " << _min << " max: " << _max; + ss << "ns:" << _manager->getns() << " at: " << _shard.toString() << " lastmod: " << _lastmod.toString() << " min: " << _min << " max: " << _max; return ss.str(); } @@ -410,139 +563,291 @@ // ------- ChunkManager -------- - unsigned long long ChunkManager::NextSequenceNumber = 1; + AtomicUInt ChunkManager::NextSequenceNumber = 1; ChunkManager::ChunkManager( DBConfig * config , string ns , ShardKeyPattern pattern , bool unique ) : - _config( config ) , _ns( ns ) , _key( pattern ) , _unique( unique ){ - Chunk temp(0); + _config( config ) , _ns( ns ) , + _key( pattern ) , _unique( unique ) , + _sequenceNumber( ++NextSequenceNumber ), _lock("rw:ChunkManager") + { + _reload_inlock(); + + if ( _chunkMap.empty() ){ + ChunkPtr c( new Chunk(this, _key.globalMin(), _key.globalMax(), config->getPrimary()) ); + c->setModified( true ); + + _chunkMap[c->getMax()] = c; + _chunkRanges.reloadAll(_chunkMap); + + _shards.insert(c->getShard()); + + save_inlock( true ); + log() << "no chunks for:" << ns << " so creating first: " << c->toString() << endl; + } + } + + ChunkManager::~ChunkManager(){ + _chunkMap.clear(); + _chunkRanges.clear(); + _shards.clear(); + } + + void ChunkManager::_reload(){ + rwlock lk( _lock , true ); + _reload_inlock(); + } + + void ChunkManager::_reload_inlock(){ + int tries = 3; + while (tries--){ + _chunkMap.clear(); + _chunkRanges.clear(); + _shards.clear(); + _load(); + + if (_isValid()){ + _chunkRanges.reloadAll(_chunkMap); + return; + } + + if (_chunkMap.size() < 10){ + _printChunks(); + } + sleepmillis(10 * (3-tries)); + sleepsecs(10); + } + msgasserted(13282, "Couldn't load a valid config for " + _ns + " after 3 tries. Giving up"); + + } + + void ChunkManager::_load(){ + static Chunk temp(0); ScopedDbConnection conn( temp.modelServer() ); - auto_ptr cursor = conn->query( temp.getNS() , BSON( "ns" << ns ) ); + + auto_ptr cursor = conn->query(temp.getNS(), QUERY("ns" << _ns).sort("lastmod",1), 0, 0, 0, 0, + (DEBUG_BUILD ? 2 : 1000000)); // batch size. Try to induce potential race conditions in debug builds + assert( cursor.get() ); while ( cursor->more() ){ BSONObj d = cursor->next(); if ( d["isMaxMarker"].trueValue() ){ continue; } - - Chunk * c = new Chunk( this ); + + ChunkPtr c( new Chunk( this ) ); c->unserialize( d ); - _chunks.push_back( c ); - c->_id = d["_id"].wrap().getOwned(); + + _chunkMap[c->getMax()] = c; + _shards.insert(c->getShard()); + } conn.done(); - - if ( _chunks.size() == 0 ){ - Chunk * c = new Chunk( this ); - c->_ns = ns; - c->setMin(_key.globalMin()); - c->setMax(_key.globalMax()); - c->_shard = config->getPrimary(); - c->_markModified(); - - _chunks.push_back( c ); - - log() << "no chunks for:" << ns << " so creating first: " << c->toString() << endl; + } + + bool ChunkManager::_isValid() const { +#define ENSURE(x) do { if(!(x)) { log() << "ChunkManager::_isValid failed: " #x << endl; return false; } } while(0) + + if (_chunkMap.empty()) + return true; + + // Check endpoints + ENSURE(allOfType(MinKey, _chunkMap.begin()->second->getMin())); + ENSURE(allOfType(MaxKey, prior(_chunkMap.end())->second->getMax())); + + // Make sure there are no gaps or overlaps + for (ChunkMap::const_iterator it=boost::next(_chunkMap.begin()), end=_chunkMap.end(); it != end; ++it){ + ChunkMap::const_iterator last = prior(it); + + if (!(it->second->getMin() == last->second->getMax())){ + PRINT(it->second->toString()); + PRINT(it->second->getMin()); + PRINT(last->second->getMax()); + } + ENSURE(it->second->getMin() == last->second->getMax()); } - _sequenceNumber = ++NextSequenceNumber; + return true; + +#undef ENSURE } - - ChunkManager::~ChunkManager(){ - for ( vector::iterator i=_chunks.begin(); i != _chunks.end(); i++ ){ - delete( *i ); + + void ChunkManager::_printChunks() const { + for (ChunkMap::const_iterator it=_chunkMap.begin(), end=_chunkMap.end(); it != end; ++it) { + log() << *it->second << endl; } - _chunks.clear(); } bool ChunkManager::hasShardKey( const BSONObj& obj ){ return _key.hasShardKey( obj ); } - Chunk& ChunkManager::findChunk( const BSONObj & obj ){ + ChunkPtr ChunkManager::findChunk( const BSONObj & obj , bool retry ){ + BSONObj key = _key.extractKey(obj); - for ( vector::iterator i=_chunks.begin(); i != _chunks.end(); i++ ){ - Chunk * c = *i; - if ( c->contains( obj ) ) - return *c; + { + rwlock lk( _lock , false ); + + BSONObj foo; + ChunkPtr c; + { + ChunkMap::iterator it = _chunkMap.upper_bound(key); + if (it != _chunkMap.end()){ + foo = it->first; + c = it->second; + } + } + + if ( c ){ + if ( c->contains( obj ) ) + return c; + + PRINT(foo); + PRINT(*c); + PRINT(key); + + _reload_inlock(); + massert(13141, "Chunk map pointed to incorrect chunk", false); + } } - stringstream ss; - ss << "couldn't find a chunk which should be impossible extracted: " << _key.extractKey( obj ); - throw UserException( 8070 , ss.str() ); - } - Chunk* ChunkManager::findChunkOnServer( const string& server ) const { + if ( retry ){ + stringstream ss; + ss << "couldn't find a chunk aftry retry which should be impossible extracted: " << key; + throw UserException( 8070 , ss.str() ); + } + + log() << "ChunkManager: couldn't find chunk for: " << key << " going to retry" << endl; + _reload_inlock(); + return findChunk( obj , true ); + } - for ( vector::const_iterator i=_chunks.begin(); i!=_chunks.end(); i++ ){ - Chunk * c = *i; - if ( c->getShard() == server ) + ChunkPtr ChunkManager::findChunkOnServer( const Shard& shard ) const { + rwlock lk( _lock , false ); + + for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){ + ChunkPtr c = i->second; + if ( c->getShard() == shard ) return c; } - return 0; + return ChunkPtr(); } - int ChunkManager::getChunksForQuery( vector& chunks , const BSONObj& query ){ - int added = 0; - - for ( vector::iterator i=_chunks.begin(); i != _chunks.end(); i++ ){ - Chunk * c = *i; - if ( _key.relevantForQuery( query , c ) ){ - chunks.push_back( c ); - added++; + void ChunkManager::getShardsForQuery( set& shards , const BSONObj& query ){ + rwlock lk( _lock , false ); + DEV PRINT(query); + + //TODO look into FieldRangeSetOr + FieldRangeOrSet fros(_ns.c_str(), query, false); + uassert(13088, "no support for special queries yet", fros.getSpecial().empty()); + + do { + boost::scoped_ptr frs (fros.topFrs()); + { + // special case if most-significant field isn't in query + FieldRange range = frs->range(_key.key().firstElement().fieldName()); + if ( !range.nontrivial() ){ + DEV PRINT(range.nontrivial()); + getAllShards(shards); + return; + } + } + + BoundList ranges = frs->indexBounds(_key.key(), 1); + for (BoundList::const_iterator it=ranges.begin(), end=ranges.end(); it != end; ++it){ + BSONObj minObj = it->first.replaceFieldNames(_key.key()); + BSONObj maxObj = it->second.replaceFieldNames(_key.key()); + + DEV PRINT(minObj); + DEV PRINT(maxObj); + + ChunkRangeMap::const_iterator min, max; + min = _chunkRanges.upper_bound(minObj); + max = _chunkRanges.upper_bound(maxObj); + + assert(min != _chunkRanges.ranges().end()); + + // make max non-inclusive like end iterators + if(max != _chunkRanges.ranges().end()) + ++max; + + for (ChunkRangeMap::const_iterator it=min; it != max; ++it){ + shards.insert(it->second->getShard()); + } + + // once we know we need to visit all shards no need to keep looping + //if (shards.size() == _shards.size()) + //return; } + + if (fros.moreOrClauses()) + fros.popOrClause(); + + } while (fros.moreOrClauses()); + } + + void ChunkManager::getShardsForRange(set& shards, const BSONObj& min, const BSONObj& max){ + uassert(13405, "min must have shard key", hasShardKey(min)); + uassert(13406, "max must have shard key", hasShardKey(max)); + + ChunkRangeMap::const_iterator it = _chunkRanges.upper_bound(min); + ChunkRangeMap::const_iterator end = _chunkRanges.lower_bound(max); + + for (; it!=end; ++ it){ + shards.insert(it->second->getShard()); + + // once we know we need to visit all shards no need to keep looping + if (shards.size() == _shards.size()) + break; } - return added; } - void ChunkManager::getAllServers( set& allServers ){ - for ( vector::iterator i=_chunks.begin(); i != _chunks.end(); i++ ){ - allServers.insert( (*i)->getShard() ); - } + void ChunkManager::getAllShards( set& all ){ + rwlock lk( _lock , false ); + all.insert(_shards.begin(), _shards.end()); } - void ChunkManager::ensureIndex(){ - set seen; - - for ( vector::const_iterator i=_chunks.begin(); i!=_chunks.end(); i++ ){ - Chunk * c = *i; - if ( seen.count( c->getShard() ) ) - continue; - seen.insert( c->getShard() ); - c->ensureIndex(); + void ChunkManager::ensureIndex_inlock(){ + //TODO in parallel? + for ( set::const_iterator i=_shards.begin(); i!=_shards.end(); ++i ){ + ScopedDbConnection conn( i->getConnString() ); + conn->ensureIndex( getns() , getShardKey().key() , _unique ); + conn.done(); } } - void ChunkManager::drop(){ + void ChunkManager::drop( ChunkManagerPtr me ){ + rwlock lk( _lock , true ); + + configServer.logChange( "dropCollection.start" , _ns , BSONObj() ); + + DistributedLock lockSetup( ConnectionString( configServer.modelServer() , ConnectionString::SYNC ) , getns() ); + dist_lock_try dlk( &lockSetup , "drop" ); + uassert( 13331 , "locking namespace failed" , dlk.got() ); + uassert( 10174 , "config servers not all up" , configServer.allUp() ); - map seen; + set seen; log(1) << "ChunkManager::drop : " << _ns << endl; // lock all shards so no one can do a split/migrate - for ( vector::const_iterator i=_chunks.begin(); i!=_chunks.end(); i++ ){ - Chunk * c = *i; - ShardChunkVersion& version = seen[ c->getShard() ]; - if ( version ) - continue; - version = lockNamespaceOnServer( c->getShard() , _ns ); - if ( version ) - continue; - - // rollback - uassert( 10175 , "don't know how to rollback locks b/c drop can't lock all shards" , 0 ); + for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){ + ChunkPtr c = i->second; + seen.insert( c->getShard() ); } log(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl; // wipe my meta-data - _chunks.clear(); + _chunkMap.clear(); + _chunkRanges.clear(); + _shards.clear(); // delete data from mongod - for ( map::iterator i=seen.begin(); i!=seen.end(); i++ ){ - string shard = i->first; - ScopedDbConnection conn( shard ); + for ( set::iterator i=seen.begin(); i!=seen.end(); i++ ){ + ScopedDbConnection conn( *i ); conn->dropCollection( _ns ); conn.done(); } @@ -551,18 +856,16 @@ // clean up database meta-data uassert( 10176 , "no sharding data?" , _config->removeSharding( _ns ) ); - _config->save(); - // remove chunk data - Chunk temp(0); + static Chunk temp(0); ScopedDbConnection conn( temp.modelServer() ); conn->remove( temp.getNS() , BSON( "ns" << _ns ) ); conn.done(); log(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl; - for ( map::iterator i=seen.begin(); i!=seen.end(); i++ ){ - ScopedDbConnection conn( i->first ); + for ( set::iterator i=seen.begin(); i!=seen.end(); i++ ){ + ScopedDbConnection conn( *i ); BSONObj res; if ( ! setShardVersion( conn.conn() , _ns , 0 , true , res ) ) throw UserException( 8071 , (string)"OH KNOW, cleaning up after drop failed: " + res.toString() ); @@ -571,50 +874,159 @@ log(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl; + configServer.logChange( "dropCollection" , _ns , BSONObj() ); } - void ChunkManager::save(){ - ShardChunkVersion a = getVersion(); + void ChunkManager::save( bool major ){ + rwlock lk( _lock , true ); + save_inlock( major ); + } + + void ChunkManager::save_inlock( bool major ){ + + ShardChunkVersion a = getVersion_inlock(); + assert( a > 0 || _chunkMap.size() <= 1 ); + ShardChunkVersion nextChunkVersion = a; + nextChunkVersion.inc( major ); + + vector toFix; + vector newVersions; - set withRealChunks; + BSONObjBuilder cmdBuilder; + BSONArrayBuilder updates( cmdBuilder.subarrayStart( "applyOps" ) ); - for ( vector::const_iterator i=_chunks.begin(); i!=_chunks.end(); i++ ){ - Chunk* c = *i; - if ( ! c->_modified ) + + int numOps = 0; + for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){ + ChunkPtr c = i->second; + if ( ! c->getModified() ) continue; - c->save( true ); + + numOps++; _sequenceNumber = ++NextSequenceNumber; - withRealChunks.insert( c->getShard() ); + ShardChunkVersion myVersion = nextChunkVersion; + nextChunkVersion.incMinor(); + toFix.push_back( c ); + newVersions.push_back( myVersion ); + + BSONObjBuilder op; + op.append( "op" , "u" ); + op.appendBool( "b" , true ); + op.append( "ns" , ShardNS::chunk ); + + BSONObjBuilder n( op.subobjStart( "o" ) ); + c->serialize( n , myVersion ); + n.done(); + + BSONObjBuilder q( op.subobjStart( "o2" ) ); + q.append( "_id" , c->genID() ); + q.done(); + + updates.append( op.obj() ); + } + + if ( numOps == 0 ) + return; + + updates.done(); + + if ( a > 0 || _chunkMap.size() > 1 ){ + BSONArrayBuilder temp( cmdBuilder.subarrayStart( "preCondition" ) ); + BSONObjBuilder b; + b.append( "ns" , ShardNS::chunk ); + b.append( "q" , BSON( "query" << BSON( "ns" << _ns ) << "orderby" << BSON( "lastmod" << -1 ) ) ); + { + BSONObjBuilder bb( b.subobjStart( "res" ) ); + bb.appendTimestamp( "lastmod" , a ); + bb.done(); + } + temp.append( b.obj() ); + temp.done(); } + + BSONObj cmd = cmdBuilder.obj(); + + log(7) << "ChunkManager::save update: " << cmd << endl; - massert( 10417 , "how did version get smalled" , getVersion() >= a ); + ScopedDbConnection conn( Chunk(0).modelServer() ); + BSONObj res; + bool ok = conn->runCommand( "config" , cmd , res ); + conn.done(); + + if ( ! ok ){ + stringstream ss; + ss << "saving chunks failed. cmd: " << cmd << " result: " << res; + log( LL_ERROR ) << ss.str() << endl; + msgasserted( 13327 , ss.str() ); + } - ensureIndex(); // TODO: this is too aggressive - but not really sooo bad + for ( unsigned i=0; i_lastmod = newVersions[i]; + toFix[i]->setModified( false ); + } + + massert( 10417 , "how did version get smalled" , getVersion_inlock() >= a ); + + ensureIndex_inlock(); // TODO: this is too aggressive - but not really sooo bad } - ShardChunkVersion ChunkManager::getVersion( const string& server ) const{ + void ChunkManager::maybeChunkCollection() { + uassert( 13346 , "can't pre-split already splitted collection" , (_chunkMap.size() == 1) ); + + ChunkPtr soleChunk = _chunkMap.begin()->second; + vector splitPoints; + soleChunk->pickSplitVector( &splitPoints ); + if ( splitPoints.empty() ){ + log(1) << "not enough data to warrant chunking " << getns() << endl; + return; + } + + soleChunk->multiSplit( splitPoints ); + } + + ShardChunkVersion ChunkManager::getVersionOnConfigServer() const { + static Chunk temp(0); + + ScopedDbConnection conn( temp.modelServer() ); + + auto_ptr cursor = conn->query(temp.getNS(), QUERY("ns" << _ns).sort("lastmod",1), 1 ); + assert( cursor.get() ); + BSONObj o; + if ( cursor->more() ) + o = cursor->next(); + conn.done(); + + return o["lastmod"]; + } + + ShardChunkVersion ChunkManager::getVersion( const Shard& shard ) const{ + rwlock lk( _lock , false ); // TODO: cache or something? ShardChunkVersion max = 0; - for ( vector::const_iterator i=_chunks.begin(); i!=_chunks.end(); i++ ){ - Chunk* c = *i; - if ( c->getShard() != server ) + for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){ + ChunkPtr c = i->second; + DEV assert( c ); + if ( c->getShard() != shard ) continue; - if ( c->_lastmod > max ) max = c->_lastmod; } - return max; } ShardChunkVersion ChunkManager::getVersion() const{ + rwlock lk( _lock , false ); + return getVersion_inlock(); + } + + ShardChunkVersion ChunkManager::getVersion_inlock() const{ ShardChunkVersion max = 0; - - for ( vector::const_iterator i=_chunks.begin(); i!=_chunks.end(); i++ ){ - Chunk* c = *i; + + for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){ + ChunkPtr c = i->second; if ( c->_lastmod > max ) max = c->_lastmod; } @@ -623,27 +1035,207 @@ } string ChunkManager::toString() const { + rwlock lk( _lock , false ); + stringstream ss; - ss << "ChunkManager: " << _ns << " key:" << _key.toString() << "\n"; - for ( vector::const_iterator i=_chunks.begin(); i!=_chunks.end(); i++ ){ - const Chunk* c = *i; - ss << "\t" << c->toString() << "\n"; + ss << "ChunkManager: " << _ns << " key:" << _key.toString() << '\n'; + for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){ + const ChunkPtr c = i->second; + ss << "\t" << c->toString() << '\n'; } return ss.str(); } + + void ChunkManager::_migrationNotification(Chunk* c){ + _chunkRanges.reloadRange(_chunkMap, c->getMin(), c->getMax()); + _shards.insert(c->getShard()); + } + + void ChunkRangeManager::assertValid() const{ + if (_ranges.empty()) + return; + + try { + // No Nulls + for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it){ + assert(it->second); + } + + // Check endpoints + assert(allOfType(MinKey, _ranges.begin()->second->getMin())); + assert(allOfType(MaxKey, prior(_ranges.end())->second->getMax())); + + // Make sure there are no gaps or overlaps + for (ChunkRangeMap::const_iterator it=boost::next(_ranges.begin()), end=_ranges.end(); it != end; ++it){ + ChunkRangeMap::const_iterator last = prior(it); + assert(it->second->getMin() == last->second->getMax()); + } + + // Check Map keys + for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it){ + assert(it->first == it->second->getMax()); + } + + // Make sure we match the original chunks + const ChunkMap chunks = _ranges.begin()->second->getManager()->_chunkMap; + for ( ChunkMap::const_iterator i=chunks.begin(); i!=chunks.end(); ++i ){ + const ChunkPtr chunk = i->second; + + ChunkRangeMap::const_iterator min = _ranges.upper_bound(chunk->getMin()); + ChunkRangeMap::const_iterator max = _ranges.lower_bound(chunk->getMax()); + + assert(min != _ranges.end()); + assert(max != _ranges.end()); + assert(min == max); + assert(min->second->getShard() == chunk->getShard()); + assert(min->second->contains( chunk->getMin() )); + assert(min->second->contains( chunk->getMax() ) || (min->second->getMax() == chunk->getMax())); + } + + } catch (...) { + log( LL_ERROR ) << "\t invalid ChunkRangeMap! printing ranges:" << endl; + + for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it) + cout << it->first << ": " << *it->second << endl; + + throw; + } + } + + void ChunkRangeManager::reloadRange(const ChunkMap& chunks, const BSONObj& min, const BSONObj& max){ + if (_ranges.empty()){ + reloadAll(chunks); + return; + } + + ChunkRangeMap::iterator low = _ranges.upper_bound(min); + ChunkRangeMap::iterator high = _ranges.lower_bound(max); + + assert(low != _ranges.end()); + assert(high != _ranges.end()); + assert(low->second); + assert(high->second); + + ChunkMap::const_iterator begin = chunks.upper_bound(low->second->getMin()); + ChunkMap::const_iterator end = chunks.lower_bound(high->second->getMax()); + + assert(begin != chunks.end()); + assert(end != chunks.end()); + + // C++ end iterators are one-past-last + ++high; + ++end; + + // update ranges + _ranges.erase(low, high); // invalidates low + _insertRange(begin, end); + + assert(!_ranges.empty()); + DEV assertValid(); + + // merge low-end if possible + low = _ranges.upper_bound(min); + assert(low != _ranges.end()); + if (low != _ranges.begin()){ + shared_ptr a = prior(low)->second; + shared_ptr b = low->second; + if (a->getShard() == b->getShard()){ + shared_ptr cr (new ChunkRange(*a, *b)); + _ranges.erase(prior(low)); + _ranges.erase(low); // invalidates low + _ranges[cr->getMax()] = cr; + } + } + + DEV assertValid(); + + // merge high-end if possible + high = _ranges.lower_bound(max); + if (high != prior(_ranges.end())){ + shared_ptr a = high->second; + shared_ptr b = boost::next(high)->second; + if (a->getShard() == b->getShard()){ + shared_ptr cr (new ChunkRange(*a, *b)); + _ranges.erase(boost::next(high)); + _ranges.erase(high); //invalidates high + _ranges[cr->getMax()] = cr; + } + } + + DEV assertValid(); + } + + void ChunkRangeManager::reloadAll(const ChunkMap& chunks){ + _ranges.clear(); + _insertRange(chunks.begin(), chunks.end()); + + DEV assertValid(); + } + + void ChunkRangeManager::_insertRange(ChunkMap::const_iterator begin, const ChunkMap::const_iterator end){ + while (begin != end){ + ChunkMap::const_iterator first = begin; + Shard shard = first->second->getShard(); + while (begin != end && (begin->second->getShard() == shard)) + ++begin; + + shared_ptr cr (new ChunkRange(first, begin)); + _ranges[cr->getMax()] = cr; + } + } class ChunkObjUnitTest : public UnitTest { public: void runShard(){ - + ChunkPtr c; + assert( ! c ); + c.reset( new Chunk( 0 ) ); + assert( c ); } + void runShardChunkVersion(){ + vector all; + all.push_back( ShardChunkVersion(1,1) ); + all.push_back( ShardChunkVersion(1,2) ); + all.push_back( ShardChunkVersion(2,1) ); + all.push_back( ShardChunkVersion(2,2) ); + + for ( unsigned i=0; i -#undef assert -#define assert xassert +#include "shard.h" +#include "config.h" +#include "util.h" namespace mongo { - + class DBConfig; + class Chunk; + class ChunkRange; class ChunkManager; + class ChunkRangeMangager; class ChunkObjUnitTest; - typedef unsigned long long ShardChunkVersion; + typedef shared_ptr ChunkPtr; + + // key is max for each Chunk or ChunkRange + typedef map ChunkMap; + typedef map,BSONObjCmp> ChunkRangeMap; /** config.chunks @@ -46,10 +54,11 @@ x is in a shard iff min <= x < max */ - class Chunk : public Model , boost::noncopyable { + class Chunk : boost::noncopyable, public boost::enable_shared_from_this { public: Chunk( ChunkManager * info ); + Chunk( ChunkManager * info , const BSONObj& min, const BSONObj& max, const Shard& shard); const BSONObj& getMin() const { return _min; } const BSONObj& getMax() const { return _max; } @@ -61,15 +70,17 @@ _max = o; } - string getShard() const{ - return _shard; - } - void setShard( string shard ); + + string getns() const; + Shard getShard() const { return _shard; } + + void setShard( const Shard& shard ); bool contains( const BSONObj& obj ) const; string toString() const; - operator string() const { return toString(); } + + friend ostream& operator << (ostream& out, const Chunk& c){ return (out << c.toString()); } bool operator==(const Chunk& s) const; @@ -77,13 +88,15 @@ return ! ( *this == s ); } - void getFilter( BSONObjBuilder& b ) const; - BSONObj getFilter() const{ BSONObjBuilder b; getFilter( b ); return b.obj(); } - + // if min/max key is pos/neg infinity + bool minIsInf() const; + bool maxIsInf() const; BSONObj pickSplitPoint() const; - Chunk * split(); - Chunk * split( const BSONObj& middle ); + ChunkPtr split(); + + void pickSplitVector( vector* splitPoints ) const; + ChunkPtr multiSplit( const vector& splitPoints ); /** * @return size of shard in bytes @@ -91,7 +104,7 @@ */ long getPhysicalSize() const; - long countObjects( const BSONObj& filter = BSONObj() ) const; + int countObjects(int maxcount=0) const; /** * if the amount of data written nears the max size of a shard @@ -99,39 +112,44 @@ */ bool splitIfShould( long dataWritten ); - /* * moves either this shard or newShard if it makes sense too * @return whether or not a shard was moved */ - bool moveIfShould( Chunk * newShard = 0 ); - - bool moveAndCommit( const string& to , string& errmsg ); + bool moveIfShould( ChunkPtr newShard = ChunkPtr() ); - virtual const char * getNS(){ return "config.chunks"; } - virtual void serialize(BSONObjBuilder& to); - virtual void unserialize(const BSONObj& from); - virtual string modelServer(); + bool moveAndCommit( const Shard& to , string& errmsg ); - virtual void save( bool check=false ); - - void ensureIndex(); - - void _markModified(); + const char * getNS(){ return "config.chunks"; } + void serialize(BSONObjBuilder& to, ShardChunkVersion myLastMod=0); + void unserialize(const BSONObj& from); + string modelServer() const; + void appendShortVersion( const char * name , BSONObjBuilder& b ); + static int MaxChunkSize; - private: + string genID() const; + static string genID( const string& ns , const BSONObj& min ); + + const ChunkManager* getManager() const { return _manager; } + bool getModified() { return _modified; } + void setModified( bool modified ) { _modified = modified; } + + ShardChunkVersion getVersionOnConfigServer() const; + private: + + bool _splitIfShould( long dataWritten ); + // main shard info ChunkManager * _manager; ShardKeyPattern skey() const; - string _ns; BSONObj _min; BSONObj _max; - string _shard; + Shard _shard; ShardChunkVersion _lastmod; bool _modified; @@ -139,7 +157,7 @@ // transient stuff long _dataWritten; - + // methods, etc.. void _split( BSONObj& middle ); @@ -148,6 +166,78 @@ friend class ShardObjUnitTest; }; + class ChunkRange{ + public: + const ChunkManager* getManager() const{ return _manager; } + Shard getShard() const{ return _shard; } + + const BSONObj& getMin() const { return _min; } + const BSONObj& getMax() const { return _max; } + + // clones of Chunk methods + bool contains(const BSONObj& obj) const; + + ChunkRange(ChunkMap::const_iterator begin, const ChunkMap::const_iterator end) + : _manager(begin->second->getManager()) + , _shard(begin->second->getShard()) + , _min(begin->second->getMin()) + , _max(prior(end)->second->getMax()) + { + assert( begin != end ); + + DEV while (begin != end){ + assert(begin->second->getManager() == _manager); + assert(begin->second->getShard() == _shard); + ++begin; + } + } + + // Merge min and max (must be adjacent ranges) + ChunkRange(const ChunkRange& min, const ChunkRange& max) + : _manager(min.getManager()) + , _shard(min.getShard()) + , _min(min.getMin()) + , _max(max.getMax()) + { + assert(min.getShard() == max.getShard()); + assert(min.getManager() == max.getManager()); + assert(min.getMax() == max.getMin()); + } + + friend ostream& operator<<(ostream& out, const ChunkRange& cr){ + return (out << "ChunkRange(min=" << cr._min << ", max=" << cr._max << ", shard=" << cr._shard <<")"); + } + + private: + const ChunkManager* _manager; + const Shard _shard; + const BSONObj _min; + const BSONObj _max; + }; + + + class ChunkRangeManager { + public: + const ChunkRangeMap& ranges() const { return _ranges; } + + void clear() { _ranges.clear(); } + + void reloadAll(const ChunkMap& chunks); + void reloadRange(const ChunkMap& chunks, const BSONObj& min, const BSONObj& max); + + // Slow operation -- wrap with DEV + void assertValid() const; + + ChunkRangeMap::const_iterator upper_bound(const BSONObj& o) const { return _ranges.upper_bound(o); } + ChunkRangeMap::const_iterator lower_bound(const BSONObj& o) const { return _ranges.lower_bound(o); } + + private: + // assumes nothing in this range exists in _ranges + void _insertRange(ChunkMap::const_iterator begin, const ChunkMap::const_iterator end); + + ChunkRangeMap _ranges; + }; + /* config.sharding { ns: 'alleyinsider.fs.chunks' , key: { ts : 1 } , @@ -160,62 +250,90 @@ ChunkManager( DBConfig * config , string ns , ShardKeyPattern pattern , bool unique ); virtual ~ChunkManager(); - string getns(){ - return _ns; - } + string getns() const { return _ns; } - int numChunks(){ return _chunks.size(); } - Chunk* getChunk( int i ){ return _chunks[i]; } + int numChunks() const { rwlock lk( _lock , false ); return _chunkMap.size(); } bool hasShardKey( const BSONObj& obj ); - Chunk& findChunk( const BSONObj& obj ); - Chunk* findChunkOnServer( const string& server ) const; + ChunkPtr findChunk( const BSONObj& obj , bool retry = false ); + ChunkPtr findChunkOnServer( const Shard& shard ) const; ShardKeyPattern& getShardKey(){ return _key; } + const ShardKeyPattern& getShardKey() const { return _key; } bool isUnique(){ return _unique; } - - /** - * makes sure the shard index is on all servers - */ - void ensureIndex(); - - /** - * @return number of Chunk added to the vector - */ - int getChunksForQuery( vector& chunks , const BSONObj& query ); - void getAllServers( set& allServers ); + void maybeChunkCollection(); + + void getShardsForQuery( set& shards , const BSONObj& query ); + void getAllShards( set& all ); + void getShardsForRange(set& shards, const BSONObj& min, const BSONObj& max); // [min, max) - void save(); + void save( bool major ); string toString() const; - operator string() const { return toString(); } - ShardChunkVersion getVersion( const string& server ) const; + ShardChunkVersion getVersion( const Shard& shard ) const; ShardChunkVersion getVersion() const; + /** + * actually does a query on the server + * doesn't look at any local data + */ + ShardChunkVersion getVersionOnConfigServer() const; + /** * this is just an increasing number of how many ChunkManagers we have so we know if something has been updated */ unsigned long long getSequenceNumber(){ return _sequenceNumber; } + + void getInfo( BSONObjBuilder& b ){ + b.append( "key" , _key.key() ); + b.appendBool( "unique" , _unique ); + } + + /** + * @param me - so i don't get deleted before i'm done + */ + void drop( ChunkManagerPtr me ); - void drop(); + void _printChunks() const; private: + + void _reload(); + void _reload_inlock(); + void _load(); + + void save_inlock( bool major ); + ShardChunkVersion getVersion_inlock() const; + void ensureIndex_inlock(); + DBConfig * _config; string _ns; ShardKeyPattern _key; bool _unique; - vector _chunks; map _maxMarkers; + ChunkMap _chunkMap; + ChunkRangeManager _chunkRanges; + + set _shards; + unsigned long long _sequenceNumber; + mutable RWLock _lock; + + // This should only be called from Chunk after it has been migrated + void _migrationNotification(Chunk* c); + friend class Chunk; - static unsigned long long NextSequenceNumber; + friend class ChunkRangeManager; // only needed for CRM::assertValid() + static AtomicUInt NextSequenceNumber; + + bool _isValid() const; }; // like BSONObjCmp. for use as an STL comparison functor @@ -226,14 +344,30 @@ bool operator()( const Chunk &l, const Chunk &r ) const { return _cmp(l.getMin(), r.getMin()); } + bool operator()( const ptr l, const ptr r ) const { + return operator()(*l, *r); + } - bool operator()( const Chunk *l, const Chunk *r ) const { + // Also support ChunkRanges + bool operator()( const ChunkRange &l, const ChunkRange &r ) const { + return _cmp(l.getMin(), r.getMin()); + } + bool operator()( const shared_ptr l, const shared_ptr r ) const { return operator()(*l, *r); } private: BSONObjCmp _cmp; }; - + /* + struct chunk_lock { + chunk_lock( const Chunk* c ){ + + } + + Chunk _c; + }; + */ + inline string Chunk::genID() const { return genID(_manager->getns(), _min); } } // namespace mongo diff -Nru mongodb-1.4.4/s/commands_admin.cpp mongodb-1.6.3/s/commands_admin.cpp --- mongodb-1.4.4/s/commands_admin.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/commands_admin.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -25,38 +25,40 @@ _ secondary indexes */ -#include "stdafx.h" +#include "pch.h" #include "../util/message.h" -#include "../db/dbmessage.h" +#include "../util/processinfo.h" +#include "../util/stringutils.h" + #include "../client/connpool.h" + +#include "../db/dbmessage.h" #include "../db/commands.h" +#include "../db/stats/counters.h" #include "config.h" #include "chunk.h" +#include "grid.h" #include "strategy.h" +#include "stats.h" namespace mongo { - extern string ourHostname; - namespace dbgrid_cmds { - set dbgridCommands; - class GridAdminCmd : public Command { public: - GridAdminCmd( const char * n ) : Command( n ){ - dbgridCommands.insert( n ); + GridAdminCmd( const char * n ) : Command( n , false, tolowerString(n).c_str() ){ } - virtual bool slaveOk(){ + virtual bool slaveOk() const { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return true; } // all grid commands are designed not to lock - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } }; // --------------- misc commands ---------------------- @@ -67,64 +69,131 @@ virtual void help( stringstream& help ) const { help << " shows status/reachability of servers in the cluster"; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - result.append("configserver", configServer.getPrimary() ); + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + result.append("configserver", configServer.getPrimary().getConnString() ); result.append("isdbgrid", 1); return true; } } netstat; - - class ListGridCommands : public GridAdminCmd { + + class ServerStatusCmd : public Command { public: - ListGridCommands() : GridAdminCmd("gridcommands") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + ServerStatusCmd() : Command( "serverStatus" , true ){ + _started = time(0); + } + + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return NONE; } + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + result.append("uptime",(double) (time(0)-_started)); + result.appendDate( "localTime" , jsTime() ); - BSONObjBuilder arr; - int num=0; - for ( set::iterator i = dbgridCommands.begin(); i != dbgridCommands.end(); i++ ){ - string s = BSONObjBuilder::numStr( num++ ); - arr.append( s.c_str() , *i ); + { + BSONObjBuilder t( result.subobjStart( "mem" ) ); + + ProcessInfo p; + if ( p.supported() ){ + t.appendNumber( "resident" , p.getResidentSize() ); + t.appendNumber( "virtual" , p.getVirtualMemorySize() ); + t.appendBool( "supported" , true ); + } + else { + result.append( "note" , "not all mem info support on this platform" ); + t.appendBool( "supported" , false ); + } + + t.done(); } - result.appendArray( "commands" , arr.done() ); - return true; + { + BSONObjBuilder bb( result.subobjStart( "connections" ) ); + bb.append( "current" , connTicketHolder.used() ); + bb.append( "available" , connTicketHolder.available() ); + bb.done(); + } + + { + BSONObjBuilder bb( result.subobjStart( "extra_info" ) ); + bb.append("note", "fields vary by platform"); + ProcessInfo p; + p.getExtraInfo(bb); + bb.done(); + } + + result.append( "opcounters" , globalOpCounters.getObj() ); + { + BSONObjBuilder bb( result.subobjStart( "ops" ) ); + bb.append( "sharded" , opsSharded.getObj() ); + bb.append( "notSharded" , opsNonSharded.getObj() ); + bb.done(); + } + + result.append( "shardCursorType" , shardedCursorTypes.getObj() ); + + { + BSONObjBuilder asserts( result.subobjStart( "asserts" ) ); + asserts.append( "regular" , assertionCount.regular ); + asserts.append( "warning" , assertionCount.warning ); + asserts.append( "msg" , assertionCount.msg ); + asserts.append( "user" , assertionCount.user ); + asserts.append( "rollovers" , assertionCount.rollovers ); + asserts.done(); + } + + return 1; } - } listGridCommands; - // ------------ database level commands ------------- + time_t _started; + } cmdServerStatus; - class ListDatabaseCommand : public GridAdminCmd { + class FsyncCommand : public GridAdminCmd { public: - ListDatabaseCommand() : GridAdminCmd("listdatabases") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - ScopedDbConnection conn( configServer.getPrimary() ); + FsyncCommand() : GridAdminCmd( "fsync" ){} + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + if ( cmdObj["lock"].trueValue() ){ + errmsg = "can't do lock through mongos"; + return false; + } + + BSONObjBuilder sub; - auto_ptr cursor = conn->query( "config.databases" , BSONObj() ); + bool ok = true; + int numFiles = 0; + + vector shards; + Shard::getAllShards( shards ); + for ( vector::iterator i=shards.begin(); i!=shards.end(); i++ ){ + Shard s = *i; - BSONObjBuilder list; - int num = 0; - while ( cursor->more() ){ - string s = BSONObjBuilder::numStr( num++ ); + BSONObj x = s.runCommand( "admin" , "fsync" ); + sub.append( s.getName() , x ); - BSONObj o = cursor->next(); - list.append( s.c_str() , o["name"].valuestrsafe() ); + if ( ! x["ok"].trueValue() ){ + ok = false; + errmsg = x["errmsg"].String(); + } + + numFiles += x["numFiles"].numberInt(); } - - result.appendArray("databases" , list.obj() ); - conn.done(); - - return true; + + result.append( "numFiles" , numFiles ); + result.append( "all" , sub.obj() ); + return ok; } - } gridListDatabase; + } fsyncCmd; + + // ------------ database level commands ------------- class MoveDatabasePrimaryCommand : public GridAdminCmd { public: - MoveDatabasePrimaryCommand() : GridAdminCmd("moveprimary") { } + MoveDatabasePrimaryCommand() : GridAdminCmd("movePrimary") { } virtual void help( stringstream& help ) const { - help << " example: { moveprimary : 'foo' , to : 'localhost:9999' } TODO: locking? "; + help << " example: { moveprimary : 'foo' , to : 'localhost:9999' }"; + // TODO: locking? } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string dbname = cmdObj["moveprimary"].valuestrsafe(); + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string dbname = cmdObj.firstElement().valuestrsafe(); if ( dbname.size() == 0 ){ errmsg = "no db"; @@ -136,7 +205,7 @@ return false; } - DBConfig * config = grid.getDBConfig( dbname , false ); + DBConfigPtr config = grid.getDBConfig( dbname , false ); if ( ! config ){ errmsg = "can't find db!"; return false; @@ -147,71 +216,68 @@ errmsg = "you have to specify where you want to move it"; return false; } + Shard s = Shard::make( to ); - if ( to == config->getPrimary() ){ + if ( config->getPrimary() == s.getConnString() ){ errmsg = "thats already the primary"; return false; } - if ( ! grid.knowAboutShard( to ) ){ + if ( ! grid.knowAboutShard( s.getConnString() ) ){ errmsg = "that server isn't known to me"; return false; } - - ScopedDbConnection conn( configServer.getPrimary() ); - - log() << "moving " << dbname << " primary from: " << config->getPrimary() << " to: " << to << endl; + + log() << "movePrimary: moving " << dbname << " primary from: " << config->getPrimary().toString() + << " to: " << s.toString() << endl; // TODO LOCKING: this is not safe with multiple mongos + ScopedDbConnection toconn( s.getConnString() ); - ScopedDbConnection toconn( to ); - - // TODO AARON - we need a clone command which replays operations from clone start to now - // using a seperate smaller oplog + // TODO ERH - we need a clone command which replays operations from clone start to now + // can just use local.oplog.$main BSONObj cloneRes; - bool worked = toconn->runCommand( dbname.c_str() , BSON( "clone" << config->getPrimary() ) , cloneRes ); + bool worked = toconn->runCommand( dbname.c_str() , BSON( "clone" << config->getPrimary().getConnString() ) , cloneRes ); toconn.done(); + if ( ! worked ){ log() << "clone failed" << cloneRes << endl; errmsg = "clone failed"; - conn.done(); return false; } ScopedDbConnection fromconn( config->getPrimary() ); - config->setPrimary( to ); - config->save( true ); + config->setPrimary( s.getConnString() ); - log() << " dropping " << dbname << " from old" << endl; + log() << "movePrimary: dropping " << dbname << " from old" << endl; fromconn->dropDatabase( dbname.c_str() ); fromconn.done(); - result << "primary" << to; + result << "primary " << s.toString(); - conn.done(); return true; } } movePrimary; class EnableShardingCmd : public GridAdminCmd { public: - EnableShardingCmd() : GridAdminCmd( "enablesharding" ){} + EnableShardingCmd() : GridAdminCmd( "enableSharding" ){} virtual void help( stringstream& help ) const { help << "Enable sharding for a db. (Use 'shardcollection' command afterwards.)\n" << " { enablesharding : \"\" }\n"; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string dbname = cmdObj["enablesharding"].valuestrsafe(); + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string dbname = cmdObj.firstElement().valuestrsafe(); if ( dbname.size() == 0 ){ errmsg = "no db"; return false; } - DBConfig * config = grid.getDBConfig( dbname ); + DBConfigPtr config = grid.getDBConfig( dbname ); if ( config->isShardingEnabled() ){ errmsg = "already enabled"; return false; @@ -220,7 +286,6 @@ log() << "enabling sharding on: " << dbname << endl; config->enableSharding(); - config->save( true ); return true; } @@ -230,20 +295,22 @@ class ShardCollectionCmd : public GridAdminCmd { public: - ShardCollectionCmd() : GridAdminCmd( "shardcollection" ){} + ShardCollectionCmd() : GridAdminCmd( "shardCollection" ){} + virtual void help( stringstream& help ) const { help << "Shard a collection. Requires key. Optional unique. Sharding must already be enabled for the database.\n" << " { enablesharding : \"\" }\n"; } - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string ns = cmdObj["shardcollection"].valuestrsafe(); + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ){ errmsg = "no ns"; return false; } - DBConfig * config = grid.getDBConfig( ns ); + DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isShardingEnabled() ){ errmsg = "sharding not enabled for db"; return false; @@ -258,23 +325,60 @@ if ( key.isEmpty() ){ errmsg = "no shard key"; return false; - } else if (key.nFields() > 1){ - errmsg = "compound shard keys not supported yet"; - return false; + } + + BSONForEach(e, key){ + if (!e.isNumber() || e.number() != 1.0){ + errmsg = "shard keys must all be ascending"; + return false; + } } if ( ns.find( ".system." ) != string::npos ){ errmsg = "can't shard system namespaces"; return false; } - + + // Sharding interacts with indexing in at least two ways: + // + // 1. A unique index must have the sharding key as its prefix. Otherwise maintainig uniqueness would + // require coordinated access to all shards. Trying to shard a collection with such an index is not + // allowed. + // + // 2. Sharding a collection requires an index over the sharding key. That index must be create upfront. + // The rationale is that sharding a non-empty collection would need to create the index and that could + // be slow. Requiring the index upfront allows the admin to plan before sharding and perhaps use + // background index construction. One exception to the rule: empty collections. It's fairly easy to + // create the index as part of the sharding process. + // + // We enforce both these conditions in what comes next. + { + ShardKeyPattern proposedKey( key ); + bool hasShardIndex = false; + ScopedDbConnection conn( config->getPrimary() ); BSONObjBuilder b; b.append( "ns" , ns ); - b.appendBool( "unique" , true ); - if ( conn->count( config->getName() + ".system.indexes" , b.obj() ) ){ - errmsg = "can't shard collection with unique indexes"; + + auto_ptr cursor = conn->query( config->getName() + ".system.indexes" , b.obj() ); + while ( cursor->more() ){ + BSONObj idx = cursor->next(); + + // Is index key over the sharding key? Remember that. + if ( key.woCompare( idx["key"].embeddedObjectUserCheck() ) == 0 ){ + hasShardIndex = true; + } + + // Not a unique index? Move on. + if ( idx["unique"].eoo() || ! idx["unique"].Bool() ) + continue; + + // Shard key is prefix of unique index? Move on. + if ( proposedKey.isPrefixOf( idx["key"].embeddedObjectUserCheck() ) ) + continue; + + errmsg = (string)"can't shard collection with unique index on: " + idx.toString(); conn.done(); return false; } @@ -286,13 +390,17 @@ return false; } + if ( ! hasShardIndex && ( conn->count( ns ) != 0 ) ){ + errmsg = "please create an index over the sharding key before sharding."; + return false; + } + conn.done(); } - - log() << "CMD: shardcollection: " << cmdObj << endl; + + tlog() << "CMD: shardcollection: " << cmdObj << endl; config->shardCollection( ns , key , cmdObj["unique"].trueValue() ); - config->save( true ); result << "collectionsharded" << ns; return true; @@ -306,26 +414,26 @@ help << " example: { getShardVersion : 'alleyinsider.foo' } "; } - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string ns = cmdObj["getShardVersion"].valuestrsafe(); + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ){ errmsg = "need to speciy fully namespace"; return false; } - DBConfig * config = grid.getDBConfig( ns ); + DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ){ errmsg = "ns not sharded."; return false; } - ChunkManager * cm = config->getChunkManager( ns ); + ChunkManagerPtr cm = config->getChunkManager( ns ); if ( ! cm ){ errmsg = "no chunk manager?"; return false; } - - result.appendTimestamp( "version" , cm->getVersion() ); + cm->_printChunks(); + result.appendTimestamp( "version" , cm->getVersion().toLong() ); return 1; } @@ -336,22 +444,24 @@ SplitCollectionHelper( const char * name ) : GridAdminCmd( name ) , _name( name ){} virtual void help( stringstream& help ) const { help - << " example: { shard : 'alleyinsider.blog.posts' , find : { ts : 1 } } - split the shard that contains give key \n" - << " example: { shard : 'alleyinsider.blog.posts' , middle : { ts : 1 } } - split the shard that contains the key with this as the middle \n" + << " example: { split : 'alleyinsider.blog.posts' , find : { ts : 1 } } - split the shard that contains give key \n" + << " example: { split : 'alleyinsider.blog.posts' , middle : { ts : 1 } } - split the shard that contains the key with this as the middle \n" << " NOTE: this does not move move the chunks, it merely creates a logical seperation \n" ; } - virtual bool _split( BSONObjBuilder& result , string&errmsg , const string& ns , ChunkManager * manager , Chunk& old , BSONObj middle ) = 0; + virtual bool _split( BSONObjBuilder& result , string&errmsg , const string& ns , ChunkManagerPtr manager , ChunkPtr old , BSONObj middle ) = 0; - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string ns = cmdObj[_name.c_str()].valuestrsafe(); + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + ShardConnection::sync(); + + string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ){ errmsg = "no ns"; return false; } - DBConfig * config = grid.getDBConfig( ns ); + DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ){ errmsg = "ns not sharded. have to shard before can split"; return false; @@ -367,8 +477,8 @@ } } - ChunkManager * info = config->getChunkManager( ns ); - Chunk& old = info->findChunk( find ); + ChunkManagerPtr info = config->getChunkManager( ns ); + ChunkPtr old = info->findChunk( find ); return _split( result , errmsg , ns , info , old , cmdObj.getObjectField( "middle" ) ); } @@ -379,15 +489,15 @@ class SplitValueCommand : public SplitCollectionHelper { public: - SplitValueCommand() : SplitCollectionHelper( "splitvalue" ){} - virtual bool _split( BSONObjBuilder& result , string& errmsg , const string& ns , ChunkManager * manager , Chunk& old , BSONObj middle ){ + SplitValueCommand() : SplitCollectionHelper( "splitValue" ){} + virtual bool _split( BSONObjBuilder& result , string& errmsg , const string& ns , ChunkManagerPtr manager , ChunkPtr old , BSONObj middle ){ - result << "shardinfo" << old.toString(); + result << "shardinfo" << old->toString(); result.appendBool( "auto" , middle.isEmpty() ); if ( middle.isEmpty() ) - middle = old.pickSplitPoint(); + middle = old->pickSplitPoint(); result.append( "middle" , middle ); @@ -400,14 +510,17 @@ class SplitCollection : public SplitCollectionHelper { public: SplitCollection() : SplitCollectionHelper( "split" ){} - virtual bool _split( BSONObjBuilder& result , string& errmsg , const string& ns , ChunkManager * manager , Chunk& old , BSONObj middle ){ - + virtual bool _split( BSONObjBuilder& result , string& errmsg , const string& ns , ChunkManagerPtr manager , ChunkPtr old , BSONObj middle ){ + assert( old.get() ); log() << "splitting: " << ns << " shard: " << old << endl; if ( middle.isEmpty() ) - old.split(); - else - old.split( middle ); + old->split(); + else { + vector splitPoints; + splitPoints.push_back( middle ); + old->multiSplit( splitPoints ); + } return true; } @@ -417,18 +530,21 @@ class MoveChunkCmd : public GridAdminCmd { public: - MoveChunkCmd() : GridAdminCmd( "movechunk" ){} + MoveChunkCmd() : GridAdminCmd( "moveChunk" ){} virtual void help( stringstream& help ) const { help << "{ movechunk : 'test.foo' , find : { num : 1 } , to : 'localhost:30001' }"; } - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string ns = cmdObj["movechunk"].valuestrsafe(); + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + ShardConnection::sync(); + + Timer t; + string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ){ errmsg = "no ns"; return false; } - DBConfig * config = grid.getDBConfig( ns ); + DBConfigPtr config = grid.getDBConfig( ns ); if ( ! config->isSharded( ns ) ){ errmsg = "ns not sharded. have to shard before can move a chunk"; return false; @@ -440,31 +556,29 @@ return false; } - string to = cmdObj["to"].valuestrsafe(); - if ( ! to.size() ){ + string toString = cmdObj["to"].valuestrsafe(); + if ( ! toString.size() ){ errmsg = "you have to specify where you want to move the chunk"; return false; } - log() << "CMD: movechunk: " << cmdObj << endl; + Shard to = Shard::make( toString ); + + tlog() << "CMD: movechunk: " << cmdObj << endl; - ChunkManager * info = config->getChunkManager( ns ); - Chunk& c = info->findChunk( find ); - string from = c.getShard(); + ChunkManagerPtr info = config->getChunkManager( ns ); + ChunkPtr c = info->findChunk( find ); + const Shard& from = c->getShard(); if ( from == to ){ errmsg = "that chunk is already on that shard"; return false; } - - if ( ! grid.knowAboutShard( to ) ){ - errmsg = "that shard isn't known to me"; - return false; - } - - if ( ! c.moveAndCommit( to , errmsg ) ) + + if ( ! c->moveAndCommit( to , errmsg ) ) return false; + result.append( "millis" , t.millis() ); return true; } } moveChunkCmd; @@ -473,11 +587,11 @@ class ListShardsCmd : public GridAdminCmd { public: - ListShardsCmd() : GridAdminCmd("listshards") { } + ListShardsCmd() : GridAdminCmd("listShards") { } virtual void help( stringstream& help ) const { help << "list all shards of the system"; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ ScopedDbConnection conn( configServer.getPrimary() ); vector all; @@ -497,86 +611,139 @@ /* a shard is a single mongod server or a replica pair. add it (them) to the cluster as a storage partition. */ class AddShard : public GridAdminCmd { public: - AddShard() : GridAdminCmd("addshard") { } + AddShard() : GridAdminCmd("addShard") { } virtual void help( stringstream& help ) const { help << "add a new shard to the system"; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - ScopedDbConnection conn( configServer.getPrimary() ); - - - string host = cmdObj["addshard"].valuestrsafe(); - - if ( host == "localhost" || host.find( "localhost:" ) == 0 || - host == "127.0.0.1" || host.find( "127.0.0.1:" ) == 0 ){ - if ( ! cmdObj["allowLocal"].trueValue() ){ - errmsg = - "can't use localhost as a shard since all shards need to communicate. " - "allowLocal to override for testing"; + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + errmsg.clear(); + + // get replica set component hosts + ConnectionString servers = ConnectionString::parse( cmdObj.firstElement().valuestrsafe() , errmsg ); + if ( ! errmsg.empty() ){ + log() << "addshard request " << cmdObj << " failed:" << errmsg << endl; + return false; + } + + // using localhost in server names implies every other process must use locahost addresses too + vector serverAddrs = servers.getServers(); + for ( size_t i = 0 ; i < serverAddrs.size() ; i++ ){ + if ( serverAddrs[i].isLocalHost() != grid.allowLocalHost() ){ + errmsg = "can't use localhost as a shard since all shards need to communicate. " + "either use all shards and configdbs in localhost or all in actual IPs " ; + log() << "addshard request " << cmdObj << " failed: attempt to mix localhosts and IPs" << endl; return false; } - } - - if ( host.find( ":" ) == string::npos ){ - stringstream ss; - ss << host << ":" << CmdLine::ShardServerPort; - host = ss.str(); - } - BSONObj shard; - { - BSONObjBuilder b; - b.append( "host" , host ); - if ( cmdObj["maxSize"].isNumber() ) - b.append( cmdObj["maxSize"] ); - shard = b.obj(); + // it's fine if mongods of a set all use default port + if ( ! serverAddrs[i].hasPort() ){ + serverAddrs[i].setPort( CmdLine::ShardServerPort ); + } } - BSONObj old = conn->findOne( "config.shards" , shard ); - if ( ! old.isEmpty() ){ - result.append( "msg" , "already exists" ); - conn.done(); - return false; - } + // name is optional; addShard will provide one if needed + string name = ""; + if ( cmdObj["name"].type() == String ) { + name = cmdObj["name"].valuestrsafe(); + } - try { - ScopedDbConnection newShardConn( host ); - newShardConn->getLastError(); - newShardConn.done(); - } - catch ( DBException& e ){ - errmsg = "couldn't connect to new shard"; - result.append( "host" , host ); - result.append( "exception" , e.what() ); - conn.done(); - return false; + // maxSize is the space usage cap in a shard in MBs + long long maxSize = 0; + if ( cmdObj[ ShardFields::maxSize.name() ].isNumber() ){ + maxSize = cmdObj[ ShardFields::maxSize.name() ].numberLong(); } - + if ( ! grid.addShard( &name , servers , maxSize , errmsg ) ){ + log() << "addshard request " << cmdObj << " failed: " << errmsg << endl; + return false; + } - conn->insert( "config.shards" , shard ); - result.append( "added" , shard["host"].valuestrsafe() ); - conn.done(); + result << "shardAdded" << name; return true; } + } addServer; + /* See usage docs at: + * http://www.mongodb.org/display/DOCS/Configuring+Sharding#ConfiguringSharding-Removingashard + */ class RemoveShardCmd : public GridAdminCmd { public: - RemoveShardCmd() : GridAdminCmd("removeshard") { } + RemoveShardCmd() : GridAdminCmd("removeShard") { } virtual void help( stringstream& help ) const { - help << "remove a shard to the system.\nshard must be empty or command will return an error."; + help << "remove a shard to the system."; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - if ( 1 ){ - errmsg = "removeshard not yet implemented"; - return 0; + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string target = cmdObj.firstElement().valuestrsafe(); + Shard s = Shard::make( target ); + if ( ! grid.knowAboutShard( s.getConnString() ) ){ + errmsg = "unknown shard"; + return false; } ScopedDbConnection conn( configServer.getPrimary() ); - BSONObj server = BSON( "host" << cmdObj["removeshard"].valuestrsafe() ); - conn->remove( "config.shards" , server ); + // If the server is not yet draining chunks, put it in draining mode. + BSONObj searchDoc = BSON( "_id" << s.getName() ); + BSONObj drainingDoc = BSON( "_id" << s.getName() << ShardFields::draining(true) ); + BSONObj shardDoc = conn->findOne( "config.shards", drainingDoc ); + if ( shardDoc.isEmpty() ){ + + // TODO prevent move chunks to this shard. + + log() << "going to start draining shard: " << s.getName() << endl; + BSONObj newStatus = BSON( "$set" << BSON( ShardFields::draining(true) ) ); + conn->update( "config.shards" , searchDoc , newStatus, false /* do no upsert */); + + errmsg = conn->getLastError(); + if ( errmsg.size() ){ + log() << "error starting remove shard: " << s.getName() << " err: " << errmsg << endl; + return false; + } + + Shard::reloadShardInfo(); + + result.append( "msg" , "draining started successfully" ); + result.append( "state" , "started" ); + result.append( "shard" , s.getName() ); + conn.done(); + return true; + } + + // If the server has been completely drained, remove it from the ConfigDB. + // Check not only for chunks but also databases. + BSONObj shardIDDoc = BSON( "shard" << shardDoc[ "_id" ].str() ); + long long chunkCount = conn->count( "config.chunks" , shardIDDoc ); + BSONObj primaryDoc = BSON( "primary" << shardDoc[ "_id" ].str() ); + long long dbCount = conn->count( "config.databases" , primaryDoc ); + if ( ( chunkCount == 0 ) && ( dbCount == 0 ) ){ + log() << "going to remove shard: " << s.getName() << endl; + conn->remove( "config.shards" , searchDoc ); + + errmsg = conn->getLastError(); + if ( errmsg.size() ){ + log() << "error concluding remove shard: " << s.getName() << " err: " << errmsg << endl; + return false; + } + + Shard::removeShard( shardDoc[ "_id" ].str() ); + Shard::reloadShardInfo(); + + result.append( "msg" , "removeshard completed successfully" ); + result.append( "state" , "completed" ); + result.append( "shard" , s.getName() ); + conn.done(); + return true; + } + + // If the server is already in draining mode, just report on its progress. + // Report on databases (not just chunks) that are left too. + result.append( "msg" , "draining ongoing" ); + result.append( "state" , "ongoing" ); + BSONObjBuilder inner; + inner.append( "chunks" , chunkCount ); + inner.append( "dbs" , dbCount ); + result.append( "remaining" , inner.obj() ); conn.done(); return true; @@ -588,48 +755,71 @@ class IsDbGridCmd : public Command { public: - virtual LockType locktype(){ return NONE; } - virtual bool slaveOk() { + virtual LockType locktype() const { return NONE; } + virtual bool slaveOk() const { return true; } IsDbGridCmd() : Command("isdbgrid") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { result.append("isdbgrid", 1); - result.append("hostname", ourHostname); + result.append("hostname", getHostNameCached()); return true; } } isdbgrid; class CmdIsMaster : public Command { public: - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { help << "test if this is master half of a replica pair"; } CmdIsMaster() : Command("ismaster") { } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { result.append("ismaster", 1.0 ); result.append("msg", "isdbgrid"); return true; } } ismaster; + class CmdWhatsMyUri : public Command { + public: + CmdWhatsMyUri() : Command("whatsmyuri") { } + virtual bool logTheOp() { + return false; // the modification will be logged directly + } + virtual bool slaveOk() const { + return true; + } + virtual LockType locktype() const { return NONE; } + virtual bool requiresAuth() { + return false; + } + virtual void help( stringstream &help ) const { + help << "{whatsmyuri:1}"; + } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + result << "you" << ClientInfo::get()->getRemote(); + return true; + } + } cmdWhatsMyUri; + + class CmdShardingGetPrevError : public Command { public: - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { help << "get previous error (since last reseterror command)"; } - CmdShardingGetPrevError() : Command("getpreverror") { } - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + CmdShardingGetPrevError() : Command( "getPrevError" , false , "getpreverror") { } + virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { errmsg += "getpreverror not supported for sharded environments"; return false; } @@ -637,20 +827,41 @@ class CmdShardingGetLastError : public Command { public: - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } - virtual bool slaveOk() { + virtual bool slaveOk() const { return true; } virtual void help( stringstream& help ) const { help << "check for an error on the last command executed"; } - CmdShardingGetLastError() : Command("getlasterror") { } - virtual bool run(const char *nsraw, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { - string dbName = nsraw; - dbName = dbName.substr( 0 , dbName.size() - 5 ); + CmdShardingGetLastError() : Command("getLastError" , false , "getlasterror") { } + + void addWriteBack( vector& all , const BSONObj& o ){ + BSONElement e = o["writeback"]; + + if ( e.type() == jstOID ) + all.push_back( e.OID() ); + } + + void handleWriteBacks( vector& all ){ + if ( all.size() == 0 ) + return; - DBConfig * conf = grid.getDBConfig( dbName , false ); + for ( unsigned i=0; imsg.size() && le->nPrev == 1 ){ + le->appendSelf( result ); + return true; + } + } ClientInfo * client = ClientInfo::get(); set * shards = client->getPrev(); @@ -659,30 +870,79 @@ result.appendNull( "err" ); return true; } + + //log() << "getlasterror enter: " << shards->size() << endl; + + + vector writebacks; + // handle single server if ( shards->size() == 1 ){ string theShard = *(shards->begin() ); result.append( "theshard" , theShard.c_str() ); - ScopedDbConnection conn( theShard ); + ShardConnection conn( theShard , "" ); BSONObj res; - bool ok = conn->runCommand( conf->getName() , cmdObj , res ); + bool ok = conn->runCommand( dbName , cmdObj , res ); + //log() << "\t" << res << endl; result.appendElements( res ); conn.done(); + result.append( "singleShard" , theShard ); + addWriteBack( writebacks , res ); + + // hit other machines just to block + for ( set::const_iterator i=client->sinceLastGetError().begin(); i!=client->sinceLastGetError().end(); ++i ){ + string temp = *i; + if ( temp == theShard ) + continue; + + ShardConnection conn( temp , "" ); + addWriteBack( writebacks , conn->getLastErrorDetailed() ); + conn.done(); + } + client->clearSinceLastGetError(); + handleWriteBacks( writebacks ); return ok; } + BSONArrayBuilder bbb( result.subarrayStart( "shards" ) ); + + long long n = 0; + + // hit each shard vector errors; for ( set::iterator i = shards->begin(); i != shards->end(); i++ ){ string theShard = *i; - ScopedDbConnection conn( theShard ); - string temp = conn->getLastError(); - if ( temp.size() ) + bbb.append( theShard ); + ShardConnection conn( theShard , "" ); + BSONObj res; + bool ok = conn->runCommand( dbName , cmdObj , res ); + addWriteBack( writebacks, res ); + string temp = DBClientWithCommands::getLastErrorString( res ); + if ( ok == false || temp.size() ) errors.push_back( temp ); + n += res["n"].numberLong(); conn.done(); } + bbb.done(); + + result.appendNumber( "n" , n ); + + // hit other machines just to block + for ( set::const_iterator i=client->sinceLastGetError().begin(); i!=client->sinceLastGetError().end(); ++i ){ + string temp = *i; + if ( shards->count( temp ) ) + continue; + + ShardConnection conn( temp , "" ); + addWriteBack( writebacks, conn->getLastErrorDetailed() ); + conn.done(); + } + client->clearSinceLastGetError(); + if ( errors.size() == 0 ){ result.appendNull( "err" ); + handleWriteBacks( writebacks ); return true; } @@ -690,13 +950,102 @@ BSONObjBuilder all; for ( unsigned i=0; i shards; + Shard::getAllShards( shards ); + + map sizes; + map< string,shared_ptr > dbShardInfo; + + for ( vector::iterator i=shards.begin(); i!=shards.end(); i++ ){ + Shard s = *i; + BSONObj x = s.runCommand( "admin" , "listDatabases" ); + + BSONObjIterator j( x["databases"].Obj() ); + while ( j.more() ){ + BSONObj theDB = j.next().Obj(); + + string name = theDB["name"].String(); + long long size = theDB["sizeOnDisk"].numberLong(); + + long long& totalSize = sizes[name]; + if ( size == 1 ){ + if ( totalSize <= 1 ) + totalSize = 1; + } + else + totalSize += size; + + shared_ptr& bb = dbShardInfo[name]; + if ( ! bb.get() ) + bb.reset( new BSONObjBuilder() ); + bb->appendNumber( s.getName() , size ); + } + + } + + long long totalSize = 0; + + BSONArrayBuilder bb( result.subarrayStart( "databases" ) ); + for ( map::iterator i=sizes.begin(); i!=sizes.end(); ++i ){ + string name = i->first; + long long size = i->second; + totalSize += size; + + BSONObjBuilder temp; + temp.append( "name" , name ); + temp.appendNumber( "size" , size ); + temp.appendBool( "empty" , size == 1 ); + temp.append( "shards" , dbShardInfo[name]->obj() ); + + bb.append( temp.obj() ); + } + bb.done(); + + result.appendNumber( "totalSize" , totalSize ); + result.appendNumber( "totalSizeMb" , totalSize / ( 1024 * 1024 ) ); + + return 1; + } + + } cmdListDatabases; + + class CmdCloseAllDatabases : public Command { + public: + CmdCloseAllDatabases() : Command("closeAllDatabases", false , "closeAllDatabases" ) {} + virtual bool logTheOp() { return false; } + virtual bool slaveOk() const { return true; } + virtual bool slaveOverrideOk() { return true; } + virtual bool adminOnly() const { return true; } + virtual LockType locktype() const { return NONE; } + virtual void help( stringstream& help ) const { help << "Not supported sharded"; } + + bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& /*result*/, bool /*fromRepl*/) { + errmsg = "closeAllDatabases isn't supported through mongos"; + return false; + } + } cmdCloseAllDatabases; + } // namespace mongo diff -Nru mongodb-1.4.4/s/commands_public.cpp mongodb-1.6.3/s/commands_public.cpp --- mongodb-1.4.4/s/commands_public.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/commands_public.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -17,16 +17,18 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../util/message.h" #include "../db/dbmessage.h" #include "../client/connpool.h" #include "../client/parallel.h" #include "../db/commands.h" +#include "../db/query.h" #include "config.h" #include "chunk.h" #include "strategy.h" +#include "grid.h" namespace mongo { @@ -34,32 +36,109 @@ class PublicGridCommand : public Command { public: - PublicGridCommand( const char * n ) : Command( n ){ + PublicGridCommand( const char* n, const char* oldname=NULL ) : Command( n, false, oldname ){ } - virtual bool slaveOk(){ + virtual bool slaveOk() const { return true; } - virtual bool adminOnly() { + virtual bool adminOnly() const { return false; } // all grid commands are designed not to lock - virtual LockType locktype(){ return NONE; } + virtual LockType locktype() const { return NONE; } protected: - string getDBName( string ns ){ - return ns.substr( 0 , ns.size() - 5 ); - } + bool passthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ){ + return _passthrough(conf->getName(), conf, cmdObj, result); + } + bool adminPassthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ){ + return _passthrough("admin", conf, cmdObj, result); + } - bool passthrough( DBConfig * conf, const BSONObj& cmdObj , BSONObjBuilder& result ){ - ScopedDbConnection conn( conf->getPrimary() ); + private: + bool _passthrough(const string& db, DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ){ + ShardConnection conn( conf->getPrimary() , "" ); BSONObj res; - bool ok = conn->runCommand( conf->getName() , cmdObj , res ); + bool ok = conn->runCommand( db , cmdObj , res ); result.appendElements( res ); conn.done(); return ok; } }; + + class RunOnAllShardsCommand : public Command { + public: + RunOnAllShardsCommand(const char* n, const char* oldname=NULL) : Command(n, false, oldname) {} + + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return false; } + + // all grid commands are designed not to lock + virtual LockType locktype() const { return NONE; } + + + // default impl uses all shards for DB + virtual void getShards(const string& dbName , BSONObj& cmdObj, set& shards){ + DBConfigPtr conf = grid.getDBConfig( dbName , false ); + conf->getAllShards(shards); + } + + virtual void aggregateResults(const vector& results, BSONObjBuilder& output) {} + + // don't override + virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& output, bool){ + set shards; + getShards(dbName, cmdObj, shards); + + list< shared_ptr > futures; + for ( set::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ){ + futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj ) ); + } + + vector results; + BSONObjBuilder subobj (output.subobjStart("raw")); + BSONObjBuilder errors; + for ( list< shared_ptr >::iterator i=futures.begin(); i!=futures.end(); i++ ){ + shared_ptr res = *i; + if ( ! res->join() ){ + errors.appendAs(res->result()["errmsg"], res->getServer()); + } + results.push_back( res->result() ); + subobj.append( res->getServer() , res->result() ); + } + + subobj.done(); + + BSONObj errobj = errors.done(); + if (! errobj.isEmpty()){ + errmsg = errobj.toString(false, true); + return false; + } + + aggregateResults(results, output); + return true; + } + + }; + + class AllShardsCollectionCommand : public RunOnAllShardsCommand { + public: + AllShardsCollectionCommand(const char* n, const char* oldname=NULL) : RunOnAllShardsCommand(n, oldname) {} + + virtual void getShards(const string& dbName , BSONObj& cmdObj, set& shards){ + string fullns = dbName + '.' + cmdObj.firstElement().valuestrsafe(); + + DBConfigPtr conf = grid.getDBConfig( dbName , false ); + + if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ + shards.insert(conf->getShard(fullns)); + } else { + conf->getChunkManager(fullns)->getAllShards(shards); + } + } + }; + class NotAllowedOnShardedCollectionCmd : public PublicGridCommand { public: @@ -67,12 +146,10 @@ virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) = 0; - virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - - string dbName = getDBName( ns ); + virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ string fullns = getFullNS( dbName , cmdObj ); - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ return passthrough( conf , cmdObj , result ); @@ -84,16 +161,70 @@ // ---- + class DropIndexesCmd : public AllShardsCollectionCommand { + public: + DropIndexesCmd() : AllShardsCollectionCommand("dropIndexes", "deleteIndexes") {} + } dropIndexesCmd; + + class ReIndexCmd : public AllShardsCollectionCommand { + public: + ReIndexCmd() : AllShardsCollectionCommand("reIndex") {} + } reIndexCmd; + + class ValidateCmd : public AllShardsCollectionCommand { + public: + ValidateCmd() : AllShardsCollectionCommand("validate") {} + } validateCmd; + + class RepairDatabaseCmd : public RunOnAllShardsCommand { + public: + RepairDatabaseCmd() : RunOnAllShardsCommand("repairDatabase") {} + } repairDatabaseCmd; + + class DBStatsCmd : public RunOnAllShardsCommand { + public: + DBStatsCmd() : RunOnAllShardsCommand("dbstats") {} + + virtual void aggregateResults(const vector& results, BSONObjBuilder& output) { + long long objects = 0; + long long dataSize = 0; + long long storageSize = 0; + long long numExtents = 0; + long long indexes = 0; + long long indexSize = 0; + long long fileSize = 0; + + for (vector::const_iterator it(results.begin()), end(results.end()); it != end; ++it){ + const BSONObj& b = *it; + objects += b["objects"].numberLong(); + dataSize += b["dataSize"].numberLong(); + storageSize += b["storageSize"].numberLong(); + numExtents += b["numExtents"].numberLong(); + indexes += b["indexes"].numberLong(); + indexSize += b["indexSize"].numberLong(); + fileSize += b["fileSize"].numberLong(); + } + + //result.appendNumber( "collections" , ncollections ); //TODO: need to find a good way to get this + output.appendNumber( "objects" , objects ); + output.append ( "avgObjSize" , double(dataSize) / double(objects) ); + output.appendNumber( "dataSize" , dataSize ); + output.appendNumber( "storageSize" , storageSize); + output.appendNumber( "numExtents" , numExtents ); + output.appendNumber( "indexes" , indexes ); + output.appendNumber( "indexSize" , indexSize ); + output.appendNumber( "fileSize" , fileSize ); + } + } DBStatsCmdObj; + class DropCmd : public PublicGridCommand { public: DropCmd() : PublicGridCommand( "drop" ){} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - - string dbName = getDBName( ns ); + bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); log() << "DROP: " << fullns << endl; @@ -101,10 +232,10 @@ return passthrough( conf , cmdObj , result ); } - ChunkManager * cm = conf->getChunkManager( fullns ); + ChunkManagerPtr cm = conf->getChunkManager( fullns ); massert( 10418 , "how could chunk manager be null!" , cm ); - cm->drop(); + cm->drop( cm ); return 1; } @@ -113,7 +244,7 @@ class DropDBCmd : public PublicGridCommand { public: DropDBCmd() : PublicGridCommand( "dropDatabase" ){} - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ BSONElement e = cmdObj.firstElement(); @@ -122,14 +253,13 @@ return 0; } - string dbName = getDBName( ns ); - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); log() << "DROP DATABASE: " << dbName << endl; - if ( ! conf || ! conf->isShardingEnabled() ){ - log(1) << " passing though drop database for: " << dbName << endl; - return passthrough( conf , cmdObj , result ); + if ( ! conf ){ + result.append( "info" , "database didn't exist" ); + return true; } if ( ! conf->dropDatabase( errmsg ) ) @@ -140,39 +270,168 @@ } } dropDBCmd; + class RenameCollectionCmd : public PublicGridCommand { + public: + RenameCollectionCmd() : PublicGridCommand( "renameCollection" ){} + bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string fullnsFrom = cmdObj.firstElement().valuestrsafe(); + string dbNameFrom = nsToDatabase( fullnsFrom.c_str() ); + DBConfigPtr confFrom = grid.getDBConfig( dbNameFrom , false ); + + string fullnsTo = cmdObj["to"].valuestrsafe(); + string dbNameTo = nsToDatabase( fullnsTo.c_str() ); + DBConfigPtr confTo = grid.getDBConfig( dbNameTo , false ); + + uassert(13140, "Don't recognize source or target DB", confFrom && confTo); + uassert(13138, "You can't rename a sharded collection", !confFrom->isSharded(fullnsFrom)); + uassert(13139, "You can't rename to a sharded collection", !confTo->isSharded(fullnsTo)); + + const Shard& shardTo = confTo->getShard(fullnsTo); + const Shard& shardFrom = confFrom->getShard(fullnsFrom); + + uassert(13137, "Source and destination collections must be on same shard", shardFrom == shardTo); + + return adminPassthrough( confFrom , cmdObj , result ); + } + } renameCollectionCmd; + + class CopyDBCmd : public PublicGridCommand { + public: + CopyDBCmd() : PublicGridCommand( "copydb" ){} + bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string todb = cmdObj.getStringField("todb"); + uassert(13402, "need a todb argument", !todb.empty()); + + DBConfigPtr confTo = grid.getDBConfig( todb ); + uassert(13398, "cant copy to sharded DB", !confTo->isShardingEnabled()); + + string fromhost = cmdObj.getStringField("fromhost"); + if (!fromhost.empty()){ + return adminPassthrough( confTo , cmdObj , result ); + } else { + string fromdb = cmdObj.getStringField("fromdb"); + uassert(13399, "need a fromdb argument", !fromdb.empty()); + + DBConfigPtr confFrom = grid.getDBConfig( fromdb , false ); + uassert(13400, "don't know where source DB is", confFrom); + uassert(13401, "cant copy from sharded DB", !confFrom->isShardingEnabled()); + + BSONObjBuilder b; + BSONForEach(e, cmdObj){ + if (strcmp(e.fieldName(), "fromhost") != 0) + b.append(e); + } + b.append("fromhost", confFrom->getPrimary().getConnString()); + BSONObj fixed = b.obj(); + + return adminPassthrough( confTo , fixed , result ); + } + + } + }copyDBCmd; + class CountCmd : public PublicGridCommand { public: CountCmd() : PublicGridCommand("count") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - - string dbName = getDBName( ns ); + bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool l){ string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; - BSONObj filter = cmdObj["query"].embeddedObject(); + BSONObj filter; + if ( cmdObj["query"].isABSONObj() ) + filter = cmdObj["query"].Obj(); - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ - ScopedDbConnection conn( conf->getPrimary() ); - result.append( "n" , (double)conn->count( fullns , filter ) ); + ShardConnection conn( conf->getPrimary() , fullns ); + + BSONObj temp; + bool ok = conn->runCommand( dbName , cmdObj , temp ); conn.done(); - return true; + + if ( ok ){ + result.append( temp["n"] ); + return true; + } + + if ( temp["code"].numberInt() != StaleConfigInContextCode ){ + errmsg = temp["errmsg"].String(); + result.appendElements( temp ); + return false; + } + + // this collection got sharded + ChunkManagerPtr cm = conf->getChunkManager( fullns , true ); + if ( ! cm ){ + errmsg = "should be sharded now"; + result.append( "root" , temp ); + return false; + } } - ChunkManager * cm = conf->getChunkManager( fullns ); - massert( 10419 , "how could chunk manager be null!" , cm ); - - vector chunks; - cm->getChunksForQuery( chunks , filter ); + long long total = 0; + map shardCounts; - unsigned long long total = 0; - for ( vector::iterator i = chunks.begin() ; i != chunks.end() ; i++ ){ - Chunk * c = *i; - total += c->countObjects( filter ); + ChunkManagerPtr cm = conf->getChunkManager( fullns ); + while ( true ){ + if ( ! cm ){ + // probably unsharded now + return run( dbName , cmdObj , errmsg , result , l ); + } + + set shards; + cm->getShardsForQuery( shards , filter ); + assert( shards.size() ); + + bool hadToBreak = false; + + for (set::iterator it=shards.begin(), end=shards.end(); it != end; ++it){ + ShardConnection conn(*it, fullns); + if ( conn.setVersion() ){ + total = 0; + shardCounts.clear(); + cm = conf->getChunkManager( fullns ); + conn.done(); + hadToBreak = true; + break; + } + + BSONObj temp; + bool ok = conn->runCommand( dbName , BSON( "count" << collection << "query" << filter ) , temp ); + conn.done(); + + if ( ok ){ + long long mine = temp["n"].numberLong(); + total += mine; + shardCounts[it->getName()] = mine; + continue; + } + + if ( StaleConfigInContextCode == temp["code"].numberInt() ){ + // my version is old + total = 0; + shardCounts.clear(); + cm = conf->getChunkManager( fullns , true ); + hadToBreak = true; + break; + } + + // command failed :( + errmsg = "failed on : " + it->getName(); + result.append( "cause" , temp ); + return false; + } + if ( ! hadToBreak ) + break; } - result.append( "n" , (double)total ); + total = applySkipLimit( total , cmdObj ); + result.appendNumber( "n" , total ); + BSONObjBuilder temp( result.subobjStart( "shards" ) ); + for ( map::iterator i=shardCounts.begin(); i!=shardCounts.end(); ++i ) + temp.appendNumber( i->first , i->second ); + temp.done(); return true; } } countCmd; @@ -180,31 +439,33 @@ class CollectionStats : public PublicGridCommand { public: CollectionStats() : PublicGridCommand("collstats") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string dbName = getDBName( ns ); + bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ + result.append( "ns" , fullns ); result.appendBool("sharded", false); + result.append( "primary" , conf->getPrimary().getName() ); return passthrough( conf , cmdObj , result); } result.appendBool("sharded", true); - ChunkManager * cm = conf->getChunkManager( fullns ); + ChunkManagerPtr cm = conf->getChunkManager( fullns ); massert( 12594 , "how could chunk manager be null!" , cm ); - set servers; - cm->getAllServers(servers); + set servers; + cm->getAllShards(servers); BSONObjBuilder shardStats; long long count=0; long long size=0; long long storageSize=0; int nindexes=0; - for ( set::iterator i=servers.begin(); i!=servers.end(); i++ ){ + bool warnedAboutIndexes = false; + for ( set::iterator i=servers.begin(); i!=servers.end(); i++ ){ ScopedDbConnection conn( *i ); BSONObj res; if ( ! conn->runCommand( dbName , cmdObj , res ) ){ @@ -217,17 +478,33 @@ size += res["size"].numberLong(); storageSize += res["storageSize"].numberLong(); - if (nindexes) - massert(12595, "nindexes should be the same on all shards!", nindexes == res["nindexes"].numberInt()); - else - nindexes = res["nindexes"].numberInt(); + int myIndexes = res["nindexes"].numberInt(); - shardStats.append(*i, res); + if ( nindexes == 0 ){ + nindexes = myIndexes; + } + else if ( nindexes == myIndexes ){ + // no-op + } + else { + // hopefully this means we're building an index + + if ( myIndexes > nindexes ) + nindexes = myIndexes; + + if ( ! warnedAboutIndexes ){ + result.append( "warning" , "indexes don't all match - ok if ensureIndex is running" ); + warnedAboutIndexes = true; + } + } + + shardStats.append(i->getName(), res); } result.append("ns", fullns); result.appendNumber("count", count); result.appendNumber("size", size); + result.append ("avgObjSize", double(size) / double(count)); result.appendNumber("storageSize", storageSize); result.append("nindexes", nindexes); @@ -241,95 +518,92 @@ class FindAndModifyCmd : public PublicGridCommand { public: FindAndModifyCmd() : PublicGridCommand("findandmodify") { } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string dbName = getDBName( ns ); + bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; - BSONObj filter = cmdObj.getObjectField("query"); - - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ return passthrough( conf , cmdObj , result); } - ChunkManager * cm = conf->getChunkManager( fullns ); + ChunkManagerPtr cm = conf->getChunkManager( fullns ); massert( 13002 , "how could chunk manager be null!" , cm ); - vector chunks; - cm->getChunksForQuery( chunks , filter ); + BSONObj filter = cmdObj.getObjectField("query"); + uassert(13343, "query for sharded findAndModify must have shardkey", cm->hasShardKey(filter)); - BSONObj sort = cmdObj.getObjectField("sort"); - if (!sort.isEmpty()){ - ShardKeyPattern& sk = cm->getShardKey(); - { - BSONObjIterator k (sk.key()); - BSONObjIterator s (sort); - bool good = true; - while (k.more()){ - if (!s.more()){ - good = false; - break; - } - - BSONElement ke = k.next(); - BSONElement se = s.next(); - - // TODO consider values when we support compound keys - if (strcmp(ke.fieldName(), se.fieldName()) != 0){ - good = false; - break; - } - } + //TODO with upsert consider tracking for splits - uassert(13001, "Sort must match shard key for sharded findandmodify", good); - } + ChunkPtr chunk = cm->findChunk(filter); + ShardConnection conn( chunk->getShard() , fullns ); + BSONObj res; + bool ok = conn->runCommand( conf->getName() , cmdObj , res ); + conn.done(); - std::sort(chunks.begin(), chunks.end(), ChunkCmp(sort)); + if (ok || (strcmp(res["errmsg"].valuestrsafe(), "No matching object found") != 0)){ + result.appendElements(res); + return ok; } + + return true; + } - for ( vector::iterator i = chunks.begin() ; i != chunks.end() ; i++ ){ - Chunk * c = *i; + } findAndModifyCmd; - ScopedDbConnection conn( c->getShard() ); + class DataSizeCmd : public PublicGridCommand { + public: + DataSizeCmd() : PublicGridCommand("dataSize", "datasize") { } + bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string fullns = cmdObj.firstElement().String(); + + DBConfigPtr conf = grid.getDBConfig( dbName , false ); + + if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ + return passthrough( conf , cmdObj , result); + } + + ChunkManagerPtr cm = conf->getChunkManager( fullns ); + massert( 13407 , "how could chunk manager be null!" , cm ); + + BSONObj min = cmdObj.getObjectField( "min" ); + BSONObj max = cmdObj.getObjectField( "max" ); + BSONObj keyPattern = cmdObj.getObjectField( "keyPattern" ); + + uassert(13408, "keyPattern must equal shard key", cm->getShardKey().key() == keyPattern); + + // yes these are doubles... + double size = 0; + double numObjects = 0; + int millis = 0; + + set shards; + cm->getShardsForRange(shards, min, max); + for ( set::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ){ + ScopedDbConnection conn( *i ); BSONObj res; - bool ok = conn->runCommand( conf->getName() , fixCmdObj(cmdObj, c) , res ); + bool ok = conn->runCommand( conf->getName() , cmdObj , res ); conn.done(); - - if (ok || (strcmp(res["errmsg"].valuestrsafe(), "No matching object found") != 0)){ - result.appendElements(res); - return ok; + + if ( ! ok ){ + result.appendElements( res ); + return false; } - } - - return true; - } - private: - BSONObj fixCmdObj(const BSONObj& cmdObj, const Chunk* chunk){ - assert(chunk); + size += res["size"].number(); + numObjects += res["numObjects"].number(); + millis += res["millis"].numberInt(); - BSONObjBuilder b; - BSONObjIterator i(cmdObj); - bool foundQuery = false; - while (i.more()){ - BSONElement e = i.next(); - if (strcmp(e.fieldName(), "query") != 0){ - b.append(e); - }else{ - foundQuery = true; - b.append("query", ClusteredCursor::concatQuery(e.embeddedObjectUserCheck(), chunk->getFilter())); - } } - if (!foundQuery) - b.append("query", chunk->getFilter()); - - return b.obj(); + result.append( "size", size ); + result.append( "numObjects" , numObjects ); + result.append( "millis" , millis ); + return true; } - } findAndModifyCmd; + } DataSizeCmd; class ConvertToCappedCmd : public NotAllowedOnShardedCollectionCmd { public: @@ -356,33 +630,30 @@ public: DistinctCmd() : PublicGridCommand("distinct"){} virtual void help( stringstream &help ) const { - help << "{ distinct : 'collection name' , key : 'a.b' }"; + help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - - string dbName = getDBName( ns ); + bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ return passthrough( conf , cmdObj , result ); } - ChunkManager * cm = conf->getChunkManager( fullns ); + ChunkManagerPtr cm = conf->getChunkManager( fullns ); massert( 10420 , "how could chunk manager be null!" , cm ); - - vector chunks; - cm->getChunksForQuery( chunks , BSONObj() ); + + BSONObj query = getQuery(cmdObj); + set shards; + cm->getShardsForQuery(shards, query); set all; int size = 32; - for ( vector::iterator i = chunks.begin() ; i != chunks.end() ; i++ ){ - Chunk * c = *i; - - ScopedDbConnection conn( c->getShard() ); + for ( set::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ){ + ShardConnection conn( *i , fullns ); BSONObj res; bool ok = conn->runCommand( conf->getName() , cmdObj , res ); conn.done(); @@ -392,11 +663,11 @@ return false; } - BSONObjIterator it( res["values"].embeddedObjectUserCheck() ); + BSONObjIterator it( res["values"].embeddedObject() ); while ( it.more() ){ BSONElement nxt = it.next(); BSONObjBuilder temp(32); - temp.appendAs( nxt , "x" ); + temp.appendAs( nxt , "" ); all.insert( temp.obj() ); } @@ -413,6 +684,45 @@ } } disinctCmd; + class FileMD5Cmd : public PublicGridCommand { + public: + FileMD5Cmd() : PublicGridCommand("filemd5"){} + virtual void help( stringstream &help ) const { + help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }"; + } + bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string fullns = dbName; + fullns += "."; + { + string root = cmdObj.getStringField( "root" ); + if ( root.size() == 0 ) + root = "fs"; + fullns += root; + } + fullns += ".chunks"; + + DBConfigPtr conf = grid.getDBConfig( dbName , false ); + + if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ + return passthrough( conf , cmdObj , result ); + } + + ChunkManagerPtr cm = conf->getChunkManager( fullns ); + massert( 13091 , "how could chunk manager be null!" , cm ); + uassert( 13092 , "GridFS chunks collection can only be sharded on files_id", cm->getShardKey().key() == BSON("files_id" << 1)); + + ChunkPtr chunk = cm->findChunk( BSON("files_id" << cmdObj.firstElement()) ); + + ShardConnection conn( chunk->getShard() , fullns ); + BSONObj res; + bool ok = conn->runCommand( conf->getName() , cmdObj , res ); + conn.done(); + + result.appendElements(res); + return ok; + } + } fileMD5Cmd; + class MRCmd : public PublicGridCommand { public: MRCmd() : PublicGridCommand( "mapreduce" ){} @@ -435,6 +745,7 @@ fn == "reduce" || fn == "query" || fn == "sort" || + fn == "scope" || fn == "verbose" ){ b.append( e ); } @@ -451,14 +762,13 @@ return b.obj(); } - bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ Timer t; - string dbName = getDBName( ns ); string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; - DBConfig * conf = grid.getDBConfig( dbName , false ); + DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){ return passthrough( conf , cmdObj , result ); @@ -466,15 +776,15 @@ BSONObjBuilder timingBuilder; - ChunkManager * cm = conf->getChunkManager( fullns ); + ChunkManagerPtr cm = conf->getChunkManager( fullns ); BSONObj q; if ( cmdObj["query"].type() == Object ){ q = cmdObj["query"].embeddedObjectUserCheck(); } - vector chunks; - cm->getChunksForQuery( chunks , q ); + set shards; + cm->getShardsForQuery( shards , q ); const string shardedOutputCollection = getTmpName( collection ); @@ -486,9 +796,8 @@ list< shared_ptr > futures; - for ( vector::iterator i = chunks.begin() ; i != chunks.end() ; i++ ){ - Chunk * c = *i; - futures.push_back( Future::spawnCommand( c->getShard() , dbName , shardedCommand ) ); + for ( set::iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ){ + futures.push_back( Future::spawnCommand( i->getConnString() , dbName , shardedCommand ) ); } BSONObjBuilder shardresults; @@ -506,9 +815,12 @@ timingBuilder.append( "shards" , t.millis() ); Timer t2; - ScopedDbConnection conn( conf->getPrimary() ); + ShardConnection conn( conf->getPrimary() , fullns ); BSONObj finalResult; - if ( ! conn->runCommand( dbName , finalCmd.obj() , finalResult ) ){ + bool ok = conn->runCommand( dbName , finalCmd.obj() , finalResult ); + conn.done(); + + if ( ! ok ){ errmsg = "final reduce failed: "; errmsg += finalResult.toString(); return 0; @@ -522,5 +834,18 @@ return 1; } } mrCmd; + + class ApplyOpsCmd : public PublicGridCommand { + public: + ApplyOpsCmd() : PublicGridCommand( "applyOps" ){} + + virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + errmsg = "applyOps not allowed through mongos"; + return false; + } + + } applyOpsCmd; + } + } diff -Nru mongodb-1.4.4/s/config.cpp mongodb-1.6.3/s/config.cpp --- mongodb-1.4.4/s/config.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/config.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,8 +16,9 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../util/message.h" +#include "../util/stringutils.h" #include "../util/unittest.h" #include "../client/connpool.h" #include "../client/model.h" @@ -27,146 +28,251 @@ #include "server.h" #include "config.h" #include "chunk.h" +#include "grid.h" namespace mongo { - int ConfigServer::VERSION = 2; + int ConfigServer::VERSION = 3; + Shard Shard::EMPTY; + + string ShardNS::shard = "config.shards"; + string ShardNS::database = "config.databases"; + string ShardNS::collection = "config.collections"; + string ShardNS::chunk = "config.chunks"; + + string ShardNS::mongos = "config.mongos"; + string ShardNS::settings = "config.settings"; + + BSONField ShardFields::draining("draining"); + BSONField ShardFields::maxSize ("maxSize"); + BSONField ShardFields::currSize("currSize"); + + OID serverID; /* --- DBConfig --- */ - string DBConfig::modelServer() { - return configServer.modelServer(); + DBConfig::CollectionInfo::CollectionInfo( DBConfig * db , const BSONObj& in ){ + _dirty = false; + _dropped = in["dropped"].trueValue(); + if ( in["key"].isABSONObj() ) + shard( db , in["_id"].String() , in["key"].Obj() , in["unique"].trueValue() ); + } + + + void DBConfig::CollectionInfo::shard( DBConfig * db , const string& ns , const ShardKeyPattern& key , bool unique ){ + _cm.reset( new ChunkManager( db, ns , key , unique ) ); + _dirty = true; + } + + void DBConfig::CollectionInfo::unshard(){ + _cm.reset(); + _dropped = true; + _dirty = true; } + void DBConfig::CollectionInfo::save( const string& ns , DBClientBase* conn ){ + BSONObj key = BSON( "_id" << ns ); + + BSONObjBuilder val; + val.append( "_id" , ns ); + val.appendDate( "lastmod" , time(0) ); + val.appendBool( "dropped" , _dropped ); + if ( _cm ) + _cm->getInfo( val ); + + conn->update( ShardNS::collection , key , val.obj() , true ); + _dirty = false; + } + + bool DBConfig::isSharded( const string& ns ){ if ( ! _shardingEnabled ) return false; - return _sharded.find( ns ) != _sharded.end(); + scoped_lock lk( _lock ); + return _isSharded( ns ); } - string DBConfig::getShard( const string& ns ){ + bool DBConfig::_isSharded( const string& ns ){ + if ( ! _shardingEnabled ) + return false; + Collections::iterator i = _collections.find( ns ); + if ( i == _collections.end() ) + return false; + return i->second.isSharded(); + } + + + const Shard& DBConfig::getShard( const string& ns ){ if ( isSharded( ns ) ) - return ""; + return Shard::EMPTY; - uassert( 10178 , "no primary!" , _primary.size() ); + uassert( 10178 , "no primary!" , _primary.ok() ); return _primary; } void DBConfig::enableSharding(){ + if ( _shardingEnabled ) + return; + scoped_lock lk( _lock ); _shardingEnabled = true; + _save(); } - ChunkManager* DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ){ - if ( ! _shardingEnabled ) - throw UserException( 8042 , "db doesn't have sharding enabled" ); + ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ){ + uassert( 8042 , "db doesn't have sharding enabled" , _shardingEnabled ); - ChunkManager * info = _shards[ns]; - if ( info ) - return info; - - if ( isSharded( ns ) ) - throw UserException( 8043 , "already sharded" ); + scoped_lock lk( _lock ); + + CollectionInfo& ci = _collections[ns]; + uassert( 8043 , "already sharded" , ! ci.isSharded() ); log() << "enable sharding on: " << ns << " with shard key: " << fieldsAndOrder << endl; - _sharded[ns] = CollectionInfo( fieldsAndOrder , unique ); - info = new ChunkManager( this , ns , fieldsAndOrder , unique ); - _shards[ns] = info; - return info; + ci.shard( this , ns , fieldsAndOrder , unique ); + ci.getCM()->maybeChunkCollection(); + _save(); + return ci.getCM(); } bool DBConfig::removeSharding( const string& ns ){ if ( ! _shardingEnabled ){ - cout << "AAAA" << endl; return false; } - ChunkManager * info = _shards[ns]; - map::iterator i = _sharded.find( ns ); + scoped_lock lk( _lock ); + + Collections::iterator i = _collections.find( ns ); - if ( info == 0 && i == _sharded.end() ){ - cout << "BBBB" << endl; + if ( i == _collections.end() ) return false; - } - uassert( 10179 , "_sharded but no info" , info ); - uassert( 10180 , "info but no sharded" , i != _sharded.end() ); - _sharded.erase( i ); - _shards.erase( ns ); // TODO: clean this up, maybe switch to shared_ptr + CollectionInfo& ci = _collections[ns]; + if ( ! ci.isSharded() ) + return false; + + ci.unshard(); + _save(); return true; } + + ChunkManagerPtr DBConfig::getChunkManager( const string& ns , bool shouldReload ){ + scoped_lock lk( _lock ); + + if ( shouldReload ) + _reload(); - ChunkManager* DBConfig::getChunkManager( const string& ns , bool reload ){ - ChunkManager* m = _shards[ns]; - if ( m && ! reload ) - return m; - - uassert( 10181 , (string)"not sharded:" + ns , isSharded( ns ) ); - if ( m && reload ) - log() << "reloading shard info for: " << ns << endl; - m = new ChunkManager( this , ns , _sharded[ ns ].key , _sharded[ns].unique ); - _shards[ns] = m; - return m; + CollectionInfo& ci = _collections[ns]; + massert( 10181 , (string)"not sharded:" + ns , ci.isSharded() || ci.wasDropped() ); + return ci.getCM(); } + void DBConfig::setPrimary( string s ){ + scoped_lock lk( _lock ); + _primary.reset( s ); + _save(); + } + void DBConfig::serialize(BSONObjBuilder& to){ - to.append("name", _name); + to.append("_id", _name); to.appendBool("partitioned", _shardingEnabled ); - to.append("primary", _primary ); - - if ( _sharded.size() > 0 ){ - BSONObjBuilder a; - for ( map::reverse_iterator i=_sharded.rbegin(); i != _sharded.rend(); i++){ - BSONObjBuilder temp; - temp.append( "key" , i->second.key.key() ); - temp.appendBool( "unique" , i->second.unique ); - a.append( i->first.c_str() , temp.obj() ); - } - to.append( "sharded" , a.obj() ); - } + to.append("primary", _primary.getName() ); } - void DBConfig::unserialize(const BSONObj& from){ - _name = from.getStringField("name"); + bool DBConfig::unserialize(const BSONObj& from){ log(1) << "DBConfig unserialize: " << _name << " " << from << endl; + assert( _name == from["_id"].String() ); _shardingEnabled = from.getBoolField("partitioned"); - _primary = from.getStringField("primary"); - - _sharded.clear(); + _primary.reset( from.getStringField("primary") ); + + // this is a temporary migration thing BSONObj sharded = from.getObjectField( "sharded" ); - if ( ! sharded.isEmpty() ){ - BSONObjIterator i(sharded); - while ( i.more() ){ - BSONElement e = i.next(); - uassert( 10182 , "sharded things have to be objects" , e.type() == Object ); - BSONObj c = e.embeddedObject(); - uassert( 10183 , "key has to be an object" , c["key"].type() == Object ); - _sharded[e.fieldName()] = CollectionInfo( c["key"].embeddedObject() , - c["unique"].trueValue() ); - } + if ( sharded.isEmpty() ) + return false; + + BSONObjIterator i(sharded); + while ( i.more() ){ + BSONElement e = i.next(); + uassert( 10182 , "sharded things have to be objects" , e.type() == Object ); + + BSONObj c = e.embeddedObject(); + uassert( 10183 , "key has to be an object" , c["key"].type() == Object ); + + _collections[e.fieldName()].shard( this , e.fieldName() , c["key"].Obj() , c["unique"].trueValue() ); } + return true; } - - void DBConfig::save( bool check ){ - Model::save( check ); - for ( map::iterator i=_shards.begin(); i != _shards.end(); i++) - i->second->save(); + + bool DBConfig::load(){ + scoped_lock lk( _lock ); + return _load(); } + bool DBConfig::_load(){ + ScopedDbConnection conn( configServer.modelServer() ); + + BSONObj o = conn->findOne( ShardNS::database , BSON( "_id" << _name ) ); + + + if ( o.isEmpty() ){ + conn.done(); + return false; + } + + if ( unserialize( o ) ) + _save(); + + BSONObjBuilder b; + b.appendRegex( "_id" , (string)"^" + _name + "." ); + + + auto_ptr cursor = conn->query( ShardNS::collection ,b.obj() ); + assert( cursor.get() ); + while ( cursor->more() ){ + BSONObj o = cursor->next(); + _collections[o["_id"].String()] = CollectionInfo( this , o ); + } + + conn.done(); + + return true; + } + + void DBConfig::_save(){ + ScopedDbConnection conn( configServer.modelServer() ); + + BSONObj n; + { + BSONObjBuilder b; + serialize(b); + n = b.obj(); + } + + conn->update( ShardNS::database , BSON( "_id" << _name ) , n , true ); + string err = conn->getLastError(); + uassert( 13396 , (string)"DBConfig save failed: " + err , err.size() == 0 ); + + for ( Collections::iterator i=_collections.begin(); i!=_collections.end(); ++i ){ + if ( ! i->second.isDirty() ) + continue; + i->second.save( i->first , conn.get() ); + } + + conn.done(); + } + + bool DBConfig::reload(){ - // TODO: i don't think is 100% correct - return doload(); + scoped_lock lk( _lock ); + return _reload(); } - bool DBConfig::doload(){ - BSONObjBuilder b; - b.append("name", _name.c_str()); - BSONObj q = b.done(); - return load(q); + bool DBConfig::_reload(){ + // TODO: i don't think is 100% correct + return _load(); } - + bool DBConfig::dropDatabase( string& errmsg ){ /** * 1) make sure everything is up @@ -177,6 +283,7 @@ */ log() << "DBConfig::dropDatabase: " << _name << endl; + configServer.logChange( "dropDatabase.start" , _name , BSONObj() ); // 1 if ( ! configServer.allUp( errmsg ) ){ @@ -186,14 +293,19 @@ // 2 grid.removeDB( _name ); - remove( true ); + { + ScopedDbConnection conn( configServer.modelServer() ); + conn->remove( ShardNS::database , BSON( "_id" << _name ) ); + conn.done(); + } + if ( ! configServer.allUp( errmsg ) ){ log() << "error removing from config server even after checking!" << endl; return 0; } log(1) << "\t removed entry from config server for: " << _name << endl; - set allServers; + set allServers; // 3 while ( true ){ @@ -217,9 +329,8 @@ } // 5 - for ( set::iterator i=allServers.begin(); i!=allServers.end(); i++ ){ - string s = *i; - ScopedDbConnection conn( s ); + for ( set::iterator i=allServers.begin(); i!=allServers.end(); i++ ){ + ScopedDbConnection conn( *i ); BSONObj res; if ( ! conn->dropDatabase( _name , &res ) ){ errmsg = res.toString(); @@ -230,16 +341,21 @@ log(1) << "\t dropped primary db for: " << _name << endl; + configServer.logChange( "dropDatabase" , _name , BSONObj() ); return true; } - bool DBConfig::_dropShardedCollections( int& num, set& allServers , string& errmsg ){ + bool DBConfig::_dropShardedCollections( int& num, set& allServers , string& errmsg ){ num = 0; set seen; while ( true ){ - map::iterator i = _shards.begin(); - - if ( i == _shards.end() ) + Collections::iterator i = _collections.begin(); + for ( ; i != _collections.end(); ++i ){ + if ( i->second.isSharded() ) + break; + } + + if ( i == _collections.end() ) break; if ( seen.count( i->first ) ){ @@ -250,117 +366,41 @@ seen.insert( i->first ); log(1) << "\t dropping sharded collection: " << i->first << endl; - i->second->getAllServers( allServers ); - i->second->drop(); + i->second.getCM()->getAllShards( allServers ); + i->second.getCM()->drop( i->second.getCM() ); num++; uassert( 10184 , "_dropShardedCollections too many collections - bailing" , num < 100000 ); log(2) << "\t\t dropped " << num << " so far" << endl; } + return true; } - /* --- Grid --- */ - - string Grid::pickShardForNewDB(){ - ScopedDbConnection conn( configServer.getPrimary() ); - - // TODO: this is temporary - - vector all; - auto_ptr c = conn->query( "config.shards" , Query() ); - while ( c->more() ){ - BSONObj s = c->next(); - all.push_back( s["host"].valuestrsafe() ); - // look at s["maxSize"] if exists + void DBConfig::getAllShards(set& shards) const{ + shards.insert(getPrimary()); + for (Collections::const_iterator it(_collections.begin()), end(_collections.end()); it != end; ++it){ + if (it->second.isSharded()){ + it->second.getCM()->getAllShards(shards); + } // TODO: handle collections on non-primary shard } - conn.done(); - - if ( all.size() == 0 ) - return ""; - - return all[ rand() % all.size() ]; - } - - bool Grid::knowAboutShard( string name ) const{ - ScopedDbConnection conn( configServer.getPrimary() ); - BSONObj shard = conn->findOne( "config.shards" , BSON( "host" << name ) ); - conn.done(); - return ! shard.isEmpty(); - } - - DBConfig* Grid::getDBConfig( string database , bool create ){ - { - string::size_type i = database.find( "." ); - if ( i != string::npos ) - database = database.substr( 0 , i ); - } - - if ( database == "config" ) - return &configServer; - - scoped_lock l( _lock ); - - DBConfig*& cc = _databases[database]; - if ( cc == 0 ){ - cc = new DBConfig( database ); - if ( ! cc->doload() ){ - if ( create ){ - // note here that cc->primary == 0. - log() << "couldn't find database [" << database << "] in config db" << endl; - - if ( database == "admin" ) - cc->_primary = configServer.getPrimary(); - else - cc->_primary = pickShardForNewDB(); - - if ( cc->_primary.size() ){ - cc->save(); - log() << "\t put [" << database << "] on: " << cc->_primary << endl; - } - else { - log() << "\t can't find a shard to put new db on" << endl; - uassert( 10185 , "can't find a shard to put new db on" , 0 ); - } - } - else { - cc = 0; - } - } - - } - - return cc; - } - - void Grid::removeDB( string database ){ - uassert( 10186 , "removeDB expects db name" , database.find( '.' ) == string::npos ); - scoped_lock l( _lock ); - _databases.erase( database ); - - } - - unsigned long long Grid::getNextOpTime() const { - ScopedDbConnection conn( configServer.getPrimary() ); - - BSONObj result; - massert( 10421 , "getoptime failed" , conn->simpleCommand( "admin" , &result , "getoptime" ) ); - conn.done(); - - return result["optime"]._numberLong(); } /* --- ConfigServer ---- */ - ConfigServer::ConfigServer() { + ConfigServer::ConfigServer() : DBConfig( "config" ){ _shardingEnabled = false; - _primary = ""; - _name = "grid"; } ConfigServer::~ConfigServer() { } + bool ConfigServer::init( string s ){ + vector configdbs; + splitStringDelim( s, &configdbs, ',' ); + return init( configdbs ); + } + bool ConfigServer::init( vector configHosts ){ uassert( 10187 , "need configdbs" , configHosts.size() ); @@ -369,18 +409,12 @@ sleepsecs(5); dbexit( EXIT_BADOPTIONS ); } - ourHostname = hn; - stringstream fullString; - set hosts; for ( size_t i=0; i 0 ) - fullString << ","; - fullString << configHosts[i]; } for ( set::iterator i=hosts.begin(); i!=hosts.end(); i++ ){ @@ -397,9 +431,97 @@ if ( ! ok ) return false; } + + _config = configHosts; + + string fullString; + joinStringDelim( configHosts, &fullString, ',' ); + _primary.setAddress( fullString , true ); + log(1) << " config string : " << fullString << endl; + + return true; + } + + bool ConfigServer::checkConfigServersConsistent( string& errmsg , int tries ) const { + if ( _config.size() == 1 ) + return true; + + if ( tries <= 0 ) + return false; + + unsigned firstGood = 0; + int up = 0; + vector res; + for ( unsigned i=0; i<_config.size(); i++ ){ + BSONObj x; + try { + ScopedDbConnection conn( _config[i] ); + if ( ! conn->simpleCommand( "config" , &x , "dbhash" ) ) + x = BSONObj(); + else { + x = x.getOwned(); + if ( up == 0 ) + firstGood = i; + up++; + } + conn.done(); + } + catch ( std::exception& ){ + log(LL_WARNING) << " couldn't check on config server:" << _config[i] << " ok for now" << endl; + } + res.push_back(x); + } + + if ( up == 0 ){ + errmsg = "no config servers reachable"; + return false; + } + + if ( up == 1 ){ + log( LL_WARNING ) << "only 1 config server reachable, continuing" << endl; + return true; + } + + BSONObj base = res[firstGood]; + for ( unsigned i=firstGood+1; imore() ); } else { - if ( conn.count( "config.shard" ) || conn.count( "config.databases" ) ){ + if ( conn.count( ShardNS::shard ) || conn.count( ShardNS::database ) ){ version = 1; } } @@ -448,29 +570,12 @@ return version; } - int ConfigServer::checkConfigVersion(){ - int cur = dbConfigVersion(); - if ( cur == VERSION ) - return 0; - - if ( cur == 0 ){ - ScopedDbConnection conn( _primary ); - conn->insert( "config.version" , BSON( "_id" << 1 << "version" << VERSION ) ); - pool.flush(); - assert( VERSION == dbConfigVersion( conn.conn() ) ); - conn.done(); - return 0; - } - - log() << "don't know how to upgrade " << cur << " to " << VERSION << endl; - return -8; - } - void ConfigServer::reloadSettings(){ set got; ScopedDbConnection conn( _primary ); - auto_ptr c = conn->query( "config.settings" , BSONObj() ); + auto_ptr c = conn->query( ShardNS::settings , BSONObj() ); + assert( c.get() ); while ( c->more() ){ BSONObj o = c->next(); string name = o["_id"].valuestrsafe(); @@ -479,21 +584,36 @@ log(1) << "MaxChunkSize: " << o["value"] << endl; Chunk::MaxChunkSize = o["value"].numberInt() * 1024 * 1024; } + else if ( name == "balancer" ){ + // ones we ignore here + } else { log() << "warning: unknown setting [" << name << "]" << endl; } } if ( ! got.count( "chunksize" ) ){ - conn->insert( "config.settings" , BSON( "_id" << "chunksize" << + conn->insert( ShardNS::settings , BSON( "_id" << "chunksize" << "value" << (Chunk::MaxChunkSize / ( 1024 * 1024 ) ) ) ); } + + + // indexes + try { + conn->ensureIndex( ShardNS::chunk , BSON( "ns" << 1 << "min" << 1 ) , true ); + conn->ensureIndex( ShardNS::chunk , BSON( "ns" << 1 << "shard" << 1 << "min" << 1 ) , true ); + conn->ensureIndex( ShardNS::chunk , BSON( "ns" << 1 << "lastmod" << 1 ) , true ); + conn->ensureIndex( ShardNS::shard , BSON( "host" << 1 ) , true ); + } + catch ( std::exception& e ){ + log( LL_WARNING ) << "couldn't create indexes on config db: " << e.what() << endl; + } conn.done(); } string ConfigServer::getHost( string name , bool withPort ){ - if ( name.find( ":" ) ){ + if ( name.find( ":" ) != string::npos ){ if ( withPort ) return name; return name.substr( 0 , name.find( ":" ) ); @@ -508,61 +628,43 @@ return name; } - ConfigServer configServer; - Grid grid; + void ConfigServer::logChange( const string& what , const string& ns , const BSONObj& detail ){ + assert( _primary.ok() ); - - class DBConfigUnitTest : public UnitTest { - public: - void testInOut( DBConfig& c , BSONObj o ){ - c.unserialize( o ); - BSONObjBuilder b; - c.serialize( b ); - - BSONObj out = b.obj(); - - if ( o.toString() == out.toString() ) - return; - - log() << "DBConfig serialization broken\n" - << "in : " << o.toString() << "\n" - << "out : " << out.toString() - << endl; - assert(0); - } - - void a(){ - BSONObjBuilder b; - b << "name" << "abc"; - b.appendBool( "partitioned" , true ); - b << "primary" << "myserver"; - - DBConfig c; - testInOut( c , b.obj() ); + static bool createdCapped = false; + static AtomicUInt num; + + ScopedDbConnection conn( _primary ); + + if ( ! createdCapped ){ + try { + conn->createCollection( "config.changelog" , 1024 * 1024 * 10 , true ); + } + catch ( UserException& e ){ + log(1) << "couldn't create changelog (like race condition): " << e << endl; + // don't care + } + createdCapped = true; } + + stringstream id; + id << getHostNameCached() << "-" << terseCurrentTime() << "-" << num++; + + BSONObj msg = BSON( "_id" << id.str() << "server" << getHostNameCached() << "time" << DATENOW << + "what" << what << "ns" << ns << "details" << detail ); + log() << "config change: " << msg << endl; - void b(){ - BSONObjBuilder b; - b << "name" << "abc"; - b.appendBool( "partitioned" , true ); - b << "primary" << "myserver"; - - BSONObjBuilder a; - a << "abc.foo" << fromjson( "{ 'key' : { 'a' : 1 } , 'unique' : false }" ); - a << "abc.bar" << fromjson( "{ 'key' : { 'kb' : -1 } , 'unique' : true }" ); - - b.appendArray( "sharded" , a.obj() ); - - DBConfig c; - testInOut( c , b.obj() ); - assert( c.isSharded( "abc.foo" ) ); - assert( ! c.isSharded( "abc.food" ) ); + try { + conn->insert( "config.changelog" , msg ); } - - void run(){ - a(); - b(); + catch ( std::exception& e ){ + log() << "not logging config change: " << e.what() << endl; } - } dbConfigUnitTest; + conn.done(); + } + + DBConfigPtr configServerPtr (new ConfigServer()); + ConfigServer& configServer = dynamic_cast(*configServerPtr); + } diff -Nru mongodb-1.4.4/s/config.h mongodb-1.6.3/s/config.h --- mongodb-1.4.4/s/config.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/config.h 2010-09-24 10:02:42.000000000 -0700 @@ -27,33 +27,89 @@ #include "../client/dbclient.h" #include "../client/model.h" #include "shardkey.h" +#include "shard.h" namespace mongo { - - class Grid; - class ConfigServer; - - extern ConfigServer configServer; - extern Grid grid; - class ChunkManager; + struct ShardNS { + static string shard; + + static string database; + static string collection; + static string chunk; - class CollectionInfo { - public: - CollectionInfo( ShardKeyPattern _key = BSONObj() , bool _unique = false ) : - key( _key ) , unique( _unique ){} + static string mongos; + static string settings; + }; - ShardKeyPattern key; - bool unique; + /** + * Field names used in the 'shards' collection. + */ + struct ShardFields { + static BSONField draining; + static BSONField maxSize; + static BSONField currSize; }; + + class ConfigServer; + + class DBConfig; + typedef boost::shared_ptr DBConfigPtr; + + extern DBConfigPtr configServerPtr; + extern ConfigServer& configServer; + + class ChunkManager; + typedef shared_ptr ChunkManagerPtr; /** - * top level grid configuration for an entire database - * TODO: use shared_ptr for ChunkManager - */ - class DBConfig : public Model { + * top level configuration for a database + */ + class DBConfig { + + struct CollectionInfo { + CollectionInfo(){ + _dirty = false; + _dropped = false; + } + + CollectionInfo( DBConfig * db , const BSONObj& in ); + + bool isSharded() const { + return _cm.get(); + } + + ChunkManagerPtr getCM() const { + return _cm; + } + + void shard( DBConfig * db , const string& ns , const ShardKeyPattern& key , bool unique ); + void unshard(); + + bool isDirty() const { return _dirty; } + bool wasDropped() const { return _dropped; } + + void save( const string& ns , DBClientBase* conn ); + + + private: + ChunkManagerPtr _cm; + bool _dirty; + bool _dropped; + }; + + typedef map Collections; + public: - DBConfig( string name = "" ) : _name( name ) , _primary("") , _shardingEnabled(false){ } + + DBConfig( string name ) + : _name( name ) , + _primary("config","") , + _shardingEnabled(false), + _lock("DBConfig"){ + assert( name.size() ); + } + virtual ~DBConfig(){} string getName(){ return _name; }; @@ -65,50 +121,58 @@ } void enableSharding(); - ChunkManager* shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ); + ChunkManagerPtr shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ); /** - * @return whether or not this partition is partitioned + * @return whether or not the 'ns' collection is partitioned */ bool isSharded( const string& ns ); - ChunkManager* getChunkManager( const string& ns , bool reload = false ); + ChunkManagerPtr getChunkManager( const string& ns , bool reload = false ); /** * @return the correct for shard for the ns - * if the namespace is sharded, will return an empty string + * if the namespace is sharded, will return NULL */ - string getShard( const string& ns ); + const Shard& getShard( const string& ns ); - string getPrimary(){ - if ( _primary.size() == 0 ) - throw UserException( 8041 , (string)"no primary shard configured for db: " + _name ); + const Shard& getPrimary() const { + uassert( 8041 , (string)"no primary shard configured for db: " + _name , _primary.ok() ); return _primary; } - void setPrimary( string s ){ - _primary = s; - } + void setPrimary( string s ); + bool load(); bool reload(); - - bool dropDatabase( string& errmsg ); - virtual void save( bool check=true); + bool dropDatabase( string& errmsg ); - virtual string modelServer(); - // model stuff - virtual const char * getNS(){ return "config.databases"; } - virtual void serialize(BSONObjBuilder& to); - virtual void unserialize(const BSONObj& from); - + // lockless loading + void serialize(BSONObjBuilder& to); + + /** + * if i need save in new format + */ + bool unserialize(const BSONObj& from); + + void getAllShards(set& shards) const; + protected: - bool _dropShardedCollections( int& num, set& allServers , string& errmsg ); - - bool doload(); + /** + lockless + */ + bool _isSharded( const string& ns ); + + bool _dropShardedCollections( int& num, set& allServers , string& errmsg ); + + bool _load(); + bool _reload(); + void _save(); + /** @return true if there was sharding info to remove @@ -116,42 +180,17 @@ bool removeSharding( const string& ns ); string _name; // e.g. "alleyinsider" - string _primary; // e.g. localhost , mongo.foo.com:9999 + Shard _primary; // e.g. localhost , mongo.foo.com:9999 bool _shardingEnabled; - map _sharded; // { "alleyinsider.blog.posts" : { ts : 1 } , ... ] - all ns that are sharded - map _shards; // this will only have entries for things that have been looked at + //map _sharded; // { "alleyinsider.blog.posts" : { ts : 1 } , ... ] - all ns that are sharded + //map _shards; // this will only have entries for things that have been looked at - friend class Grid; - friend class ChunkManager; - }; + Collections _collections; - /** - * stores meta-information about the grid - * TODO: used shard_ptr for DBConfig pointers - */ - class Grid { - public: - /** - gets the config the db. - will return an empty DBConfig if not in db already - */ - DBConfig * getDBConfig( string ns , bool create=true); - - /** - * removes db entry. - * on next getDBConfig call will fetch from db - */ - void removeDB( string db ); + mongo::mutex _lock; // TODO: change to r/w lock ?? - string pickShardForNewDB(); - - bool knowAboutShard( string name ) const; - - unsigned long long getNextOpTime() const; - private: - map _databases; - mongo::mutex _lock; // TODO: change to r/w lock + friend class ChunkManager; }; class ConfigServer : public DBConfig { @@ -159,21 +198,20 @@ ConfigServer(); ~ConfigServer(); - - bool ok(){ - // TODO: check can connect - return _primary.size() > 0; - } + + bool ok( bool checkConsistency = false ); virtual string modelServer(){ - uassert( 10190 , "ConfigServer not setup" , _primary.size() ); - return _primary; + uassert( 10190 , "ConfigServer not setup" , _primary.ok() ); + return _primary.getConnString(); } /** call at startup, this will initiate connection to the grid db */ bool init( vector configHosts ); + + bool init( string s ); bool allUp(); bool allUp( string& errmsg ); @@ -186,12 +224,31 @@ /** * @return 0 = ok, otherwise error # */ - int checkConfigVersion(); + int checkConfigVersion( bool upgrade ); + /** + * log a change to config.changes + * @param what e.g. "split" , "migrate" + * @param msg any more info + */ + void logChange( const string& what , const string& ns , const BSONObj& detail = BSONObj() ); + + ConnectionString getConnectionString() const { + return ConnectionString( _primary.getConnString() , ConnectionString::SYNC ); + } + static int VERSION; + + /** + * check to see if all config servers have the same state + * will try tries time to make sure not catching in a bad state + */ + bool checkConfigServersConsistent( string& errmsg , int tries = 4 ) const; + private: string getHost( string name , bool withPort ); + vector _config; }; - + } // namespace mongo diff -Nru mongodb-1.4.4/s/config_migrate.cpp mongodb-1.6.3/s/config_migrate.cpp --- mongodb-1.4.4/s/config_migrate.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/config_migrate.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,196 @@ +// config_migrate.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "../util/message.h" +#include "../util/unittest.h" +#include "../client/connpool.h" +#include "../client/model.h" +#include "../db/pdfile.h" +#include "../db/cmdline.h" + +#include "server.h" +#include "config.h" +#include "chunk.h" + +namespace mongo { + + int ConfigServer::checkConfigVersion( bool upgrade ){ + int cur = dbConfigVersion(); + if ( cur == VERSION ) + return 0; + + if ( cur == 0 ){ + ScopedDbConnection conn( _primary ); + conn->insert( "config.version" , BSON( "_id" << 1 << "version" << VERSION ) ); + pool.flush(); + assert( VERSION == dbConfigVersion( conn.conn() ) ); + conn.done(); + return 0; + } + + if ( cur == 2 ){ + + // need to upgrade + assert( VERSION == 3 ); + if ( ! upgrade ){ + log() << "newer version of mongo meta data\n" + << "need to --upgrade after shutting all mongos down" + << endl; + return -9; + } + + ScopedDbConnection conn( _primary ); + + // do a backup + string backupName; + { + stringstream ss; + ss << "config-backup-" << terseCurrentTime(false); + backupName = ss.str(); + } + log() << "backing up config to: " << backupName << endl; + conn->copyDatabase( "config" , backupName ); + + map hostToShard; + set shards; + // shards + { + unsigned n = 0; + auto_ptr c = conn->query( ShardNS::shard , BSONObj() ); + while ( c->more() ){ + BSONObj o = c->next(); + string host = o["host"].String(); + + string name = ""; + + BSONElement id = o["_id"]; + if ( id.type() == String ){ + name = id.String(); + } + else { + stringstream ss; + ss << "shard" << hostToShard.size(); + name = ss.str(); + } + + hostToShard[host] = name; + shards.insert( name ); + n++; + } + + assert( n == hostToShard.size() ); + assert( n == shards.size() ); + + conn->remove( ShardNS::shard , BSONObj() ); + + for ( map::iterator i=hostToShard.begin(); i != hostToShard.end(); i++ ){ + conn->insert( ShardNS::shard , BSON( "_id" << i->second << "host" << i->first ) ); + } + } + + // databases + { + auto_ptr c = conn->query( ShardNS::database , BSONObj() ); + map newDBs; + unsigned n = 0; + while ( c->more() ){ + BSONObj old = c->next(); + n++; + + if ( old["name"].eoo() ){ + // already done + newDBs[old["_id"].String()] = old; + continue; + } + + BSONObjBuilder b(old.objsize()); + b.appendAs( old["name"] , "_id" ); + + BSONObjIterator i(old); + while ( i.more() ){ + BSONElement e = i.next(); + if ( strcmp( "_id" , e.fieldName() ) == 0 || + strcmp( "name" , e.fieldName() ) == 0 ){ + continue; + } + + b.append( e ); + } + + BSONObj x = b.obj(); + log() << old << "\n\t" << x << endl; + newDBs[old["name"].String()] = x; + } + + assert( n == newDBs.size() ); + + conn->remove( ShardNS::database , BSONObj() ); + + for ( map::iterator i=newDBs.begin(); i!=newDBs.end(); i++ ){ + conn->insert( ShardNS::database , i->second ); + } + + } + + // chunks + { + unsigned num = 0; + map chunks; + auto_ptr c = conn->query( ShardNS::chunk , BSONObj() ); + while ( c->more() ){ + BSONObj x = c->next(); + BSONObjBuilder b; + + string id = Chunk::genID( x["ns"].String() , x["min"].Obj() ); + b.append( "_id" , id ); + + BSONObjIterator i(x); + while ( i.more() ){ + BSONElement e = i.next(); + if ( strcmp( e.fieldName() , "_id" ) == 0 ) + continue; + b.append( e ); + } + + BSONObj n = b.obj(); + log() << x << "\n\t" << n << endl; + chunks[id] = n; + num++; + } + + assert( num == chunks.size() ); + + conn->remove( ShardNS::chunk , BSONObj() ); + for ( map::iterator i=chunks.begin(); i!=chunks.end(); i++ ){ + conn->insert( ShardNS::chunk , i->second ); + } + + } + + conn->update( "config.version" , BSONObj() , BSON( "_id" << 1 << "version" << VERSION ) ); + conn.done(); + pool.flush(); + return 1; + } + + log() << "don't know how to upgrade " << cur << " to " << VERSION << endl; + return -8; + } + +} diff -Nru mongodb-1.4.4/s/cursors.cpp mongodb-1.6.3/s/cursors.cpp --- mongodb-1.4.4/s/cursors.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/cursors.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,10 +16,12 @@ */ -#include "stdafx.h" +#include "pch.h" #include "cursors.h" #include "../client/connpool.h" #include "../db/queryutil.h" +#include "../db/commands.h" +#include "../util/background.h" namespace mongo { @@ -35,11 +37,13 @@ _totalSent = 0; _done = false; - do { - // TODO: only create _id when needed - _id = security.getNonce(); - } while ( _id == 0 ); - + _id = 0; + + if ( q.queryOptions & QueryOption_NoCursorTimeout ){ + _lastAccessMillis = 0; + } + else + _lastAccessMillis = Listener::getElapsedTimeMillis(); } ShardedClientCursor::~ShardedClientCursor(){ @@ -48,6 +52,25 @@ _cursor = 0; } + long long ShardedClientCursor::getId(){ + if ( _id <= 0 ){ + _id = cursorCache.genId(); + assert( _id >= 0 ); + } + return _id; + } + + void ShardedClientCursor::accessed(){ + if ( _lastAccessMillis > 0 ) + _lastAccessMillis = Listener::getElapsedTimeMillis(); + } + + long long ShardedClientCursor::idleTime( long long now ){ + if ( _lastAccessMillis == 0 ) + return 0; + return now - _lastAccessMillis; + } + bool ShardedClientCursor::sendNextBatch( Request& r , int ntoreturn ){ uassert( 10191 , "cursor already done" , ! _done ); @@ -63,7 +86,7 @@ while ( _cursor->more() ){ BSONObj o = _cursor->next(); - b.append( (void*)o.objdata() , o.objsize() ); + b.appendBuf( (void*)o.objdata() , o.objsize() ); num++; if ( b.len() > maxSize ){ @@ -80,41 +103,189 @@ sendMore = false; break; } + + if ( ntoreturn == 0 && _totalSent == 0 && num > 100 ){ + // first batch should be max 100 unless batch size specified + break; + } } bool hasMore = sendMore && _cursor->more(); - log(6) << "\t hasMore:" << hasMore << " wouldSendMoreIfHad: " << sendMore << " id:" << _id << " totalSent: " << _totalSent << endl; + log(6) << "\t hasMore:" << hasMore << " wouldSendMoreIfHad: " << sendMore << " id:" << getId() << " totalSent: " << _totalSent << endl; - replyToQuery( 0 , r.p() , r.m() , b.buf() , b.len() , num , _totalSent , hasMore ? _id : 0 ); + replyToQuery( 0 , r.p() , r.m() , b.buf() , b.len() , num , _totalSent , hasMore ? getId() : 0 ); _totalSent += num; _done = ! hasMore; return hasMore; } + + // ---- CursorCache ----- + long long CursorCache::TIMEOUT = 600000; - CursorCache::CursorCache(){ + CursorCache::CursorCache() + :_mutex( "CursorCache" ), _shardedTotal(0){ } CursorCache::~CursorCache(){ // TODO: delete old cursors? + int logLevel = 1; + if ( _cursors.size() || _refs.size() ) + logLevel = 0; + log( logLevel ) << " CursorCache at shutdown - " + << " sharded: " << _cursors.size() + << " passthrough: " << _refs.size() + << endl; } - ShardedClientCursor* CursorCache::get( long long id ){ - map::iterator i = _cursors.find( id ); + ShardedClientCursorPtr CursorCache::get( long long id ){ + scoped_lock lk( _mutex ); + MapSharded::iterator i = _cursors.find( id ); if ( i == _cursors.end() ){ OCCASIONALLY log() << "Sharded CursorCache missing cursor id: " << id << endl; - return 0; + return ShardedClientCursorPtr(); } + i->second->accessed(); return i->second; } - void CursorCache::store( ShardedClientCursor * cursor ){ + void CursorCache::store( ShardedClientCursorPtr cursor ){ + assert( cursor->getId() ); + scoped_lock lk( _mutex ); _cursors[cursor->getId()] = cursor; + _shardedTotal++; } void CursorCache::remove( long long id ){ + assert( id ); + scoped_lock lk( _mutex ); _cursors.erase( id ); } + void CursorCache::storeRef( const string& server , long long id ){ + assert( id ); + scoped_lock lk( _mutex ); + _refs[id] = server; + } + + long long CursorCache::genId(){ + while ( true ){ + long long x = security.getNonce(); + if ( x == 0 ) + continue; + if ( x < 0 ) + x *= -1; + + scoped_lock lk( _mutex ); + MapSharded::iterator i = _cursors.find( x ); + if ( i != _cursors.end() ) + continue; + + MapNormal::iterator j = _refs.find( x ); + if ( j != _refs.end() ) + continue; + + return x; + } + } + + void CursorCache::gotKillCursors(Message& m ){ + int *x = (int *) m.singleData()->_data; + x++; // reserved + int n = *x++; + + if ( n > 2000 ){ + log( n < 30000 ? LL_WARNING : LL_ERROR ) << "receivedKillCursors, n=" << n << endl; + } + + + uassert( 13286 , "sent 0 cursors to kill" , n >= 1 ); + uassert( 13287 , "too many cursors to kill" , n < 30000 ); + + long long * cursors = (long long *)x; + for ( int i=0; isecond; + _refs.erase( j ); + } + + assert( server.size() ); + ScopedDbConnection conn( server ); + conn->killCursor( id ); + conn.done(); + } + } + + void CursorCache::appendInfo( BSONObjBuilder& result ){ + scoped_lock lk( _mutex ); + result.append( "sharded" , (int)_cursors.size() ); + result.appendNumber( "shardedEver" , _shardedTotal ); + result.append( "refs" , (int)_refs.size() ); + result.append( "totalOpen" , (int)(_cursors.size() + _refs.size() ) ); + } + + void CursorCache::doTimeouts(){ + long long now = Listener::getElapsedTimeMillis(); + scoped_lock lk( _mutex ); + for ( MapSharded::iterator i=_cursors.begin(); i!=_cursors.end(); ++i ){ + long long idleFor = i->second->idleTime( now ); + if ( idleFor < TIMEOUT ){ + continue; + } + log() << "killing old cursor " << i->second->getId() << " idle for: " << idleFor << "ms" << endl; // TODO: make log(1) + _cursors.erase( i ); + } + } + CursorCache cursorCache; + + class CursorTimeoutThread : public PeriodicBackgroundJob { + public: + CursorTimeoutThread() : PeriodicBackgroundJob( 4000 ){} + virtual string name() { return "cursorTimeout"; } + virtual void runLoop(){ + cursorCache.doTimeouts(); + } + } cursorTimeoutThread; + + void CursorCache::startTimeoutThread(){ + cursorTimeoutThread.go(); + } + + class CmdCursorInfo : public Command { + public: + CmdCursorInfo() : Command( "cursorInfo", true ) {} + virtual bool slaveOk() const { return true; } + virtual void help( stringstream& help ) const { + help << " example: { cursorInfo : 1 }"; + } + virtual LockType locktype() const { return NONE; } + bool run(const string&, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + cursorCache.appendInfo( result ); + if ( jsobj["setTimeout"].isNumber() ) + CursorCache::TIMEOUT = jsobj["setTimeout"].numberLong(); + return true; + } + } cmdCursorInfo; + } diff -Nru mongodb-1.4.4/s/cursors.h mongodb-1.6.3/s/cursors.h --- mongodb-1.4.4/s/cursors.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/cursors.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../db/jsobj.h" #include "../db/dbmessage.h" @@ -29,12 +29,12 @@ namespace mongo { - class ShardedClientCursor { + class ShardedClientCursor : boost::noncopyable { public: ShardedClientCursor( QueryMessage& q , ClusteredCursor * cursor ); virtual ~ShardedClientCursor(); - long long getId(){ return _id; } + long long getId(); /** * @return whether there is more data left @@ -42,6 +42,10 @@ bool sendNextBatch( Request& r ){ return sendNextBatch( r , _ntoreturn ); } bool sendNextBatch( Request& r , int ntoreturn ); + void accessed(); + /** @return idle time in ms */ + long long idleTime( long long now ); + protected: ClusteredCursor * _cursor; @@ -53,19 +57,44 @@ bool _done; long long _id; + long long _lastAccessMillis; // 0 means no timeout + }; + + typedef boost::shared_ptr ShardedClientCursorPtr; class CursorCache { public: + + static long long TIMEOUT; + + typedef map MapSharded; + typedef map MapNormal; + CursorCache(); ~CursorCache(); - ShardedClientCursor * get( long long id ); - void store( ShardedClientCursor* cursor ); + ShardedClientCursorPtr get( long long id ); + void store( ShardedClientCursorPtr cursor ); void remove( long long id ); + void storeRef( const string& server , long long id ); + + void gotKillCursors(Message& m ); + + void appendInfo( BSONObjBuilder& result ); + + long long genId(); + + void doTimeouts(); + void startTimeoutThread(); private: - map _cursors; + mutex _mutex; + + MapSharded _cursors; + MapNormal _refs; + + long long _shardedTotal; }; extern CursorCache cursorCache; diff -Nru mongodb-1.4.4/s/dbgrid.vcproj mongodb-1.6.3/s/dbgrid.vcproj --- mongodb-1.4.4/s/dbgrid.vcproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/dbgrid.vcproj 2010-09-24 10:02:42.000000000 -0700 @@ -41,13 +41,13 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + diff -Nru mongodb-1.4.4/s/dbgrid.vcxproj mongodb-1.6.3/s/dbgrid.vcxproj --- mongodb-1.4.4/s/dbgrid.vcxproj 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/dbgrid.vcxproj 2010-09-24 10:02:42.000000000 -0700 @@ -1,201 +1,569 @@ - - - - - Debug Recstore - Win32 - - - Debug - Win32 - - - Release - Win32 - - - - mongos - {E03717ED-69B4-4D21-BC55-DF6690B585C6} - dbgrid - Win32Proj - - - - Application - Unicode - - - Application - Unicode - true - - - Application - Unicode - - - - - - - - - - - - - - - - <_ProjectFileVersion>10.0.21006.1 - $(SolutionDir)$(Configuration)\ - $(Configuration)\ - true - $(SolutionDir)$(Configuration)\ - $(Configuration)\ - false - $(SolutionDir)$(Configuration)\ - $(Configuration)\ - true - - - - Disabled - ..\pcre-7.4;C:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - USE_ASIO;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - Use - stdafx.h - Level3 - EditAndContinue - 4355;4800;%(DisableSpecificWarnings) - - - ws2_32.lib;%(AdditionalDependencies) - c:\program files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) - true - Console - MachineX86 - - - - - MaxSpeed - true - USE_ASIO;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;PCRE_STATIC;%(PreprocessorDefinitions) - MultiThreadedDLL - true - Use - Level3 - ProgramDatabase - 4355;4800;%(DisableSpecificWarnings) - - - ws2_32.lib;%(AdditionalDependencies) - c:\Program Files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) - true - Console - true - true - MachineX86 - - - - - Disabled - ..\pcre-7.4;C:\Program Files\boost\boost_1_41_0;%(AdditionalIncludeDirectories) - USE_ASIO;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebugDLL - Use - stdafx.h - Level3 - EditAndContinue - 4355;4800;%(DisableSpecificWarnings) - - - ws2_32.lib;%(AdditionalDependencies) - c:\program files\boost\boost_1_41_0\lib;%(AdditionalLibraryDirectories) - true - Console - MachineX86 - - - - - - - - - - - - - - Create - Create - Create - - - - - - - - - - - - - - - - - - - - - - Use - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + mongos + {E03717ED-69B4-4D21-BC55-DF6690B585C6} + dbgrid + Win32Proj + + + + Application + Unicode + true + + + Application + Unicode + true + + + Application + Unicode + + + Application + Unicode + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + $(Configuration)\ + true + true + $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + $(Configuration)\ + false + false + AllRules.ruleset + AllRules.ruleset + + + + + AllRules.ruleset + AllRules.ruleset + + + + + + + + Disabled + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;XP_WIN;OLDJS;STATIC_JS_API;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + No + EnableFastChecks + MultiThreadedDebugDLL + Use + pch.h + Level3 + EditAndContinue + 4355;4800;%(DisableSpecificWarnings) + true + + + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_32;\boost\lib\vs2010_32;\boost\lib + true + Console + MachineX86 + + + + + Disabled + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;XP_WIN;OLDJS;STATIC_JS_API;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebugDLL + Use + pch.h + Level3 + ProgramDatabase + 4355;4800;4267;4244;%(DisableSpecificWarnings) + No + true + + + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_64;\boost\lib\vs2010_64;\boost\lib + true + Console + + + + + MaxSpeed + true + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + MultiThreaded + true + Use + Level3 + ProgramDatabase + 4355;4800;%(DisableSpecificWarnings) + No + true + pch.h + + + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_32;\boost\lib\vs2010_32;\boost\lib + true + Console + true + true + MachineX86 + + + + + MaxSpeed + true + ..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions) + MultiThreaded + true + Use + Level3 + ProgramDatabase + 4355;4800;4267;4244;%(DisableSpecificWarnings) + No + true + pch.h + + + ws2_32.lib;Psapi.lib;%(AdditionalDependencies) + c:\boost\lib\vs2010_64;\boost\lib\vs2010_64;\boost\lib + true + Console + true + true + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Use + Use + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Document + true + true + true + + + Document + true + true + true + + + Document + true + true + true + + + Document + true + true + true + + + + + + \ No newline at end of file diff -Nru mongodb-1.4.4/s/dbgrid.vcxproj.filters mongodb-1.6.3/s/dbgrid.vcxproj.filters --- mongodb-1.4.4/s/dbgrid.vcxproj.filters 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/dbgrid.vcxproj.filters 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,365 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {4048b883-7255-40b3-b0e9-4c1044cff049} + + + {17d48ddf-5c49-4dfd-bafa-16d5fed290cd} + + + {4c2dd526-4a57-4ff7-862f-2bd7ec4955b3} + + + {b4f6635b-8c64-4ceb-8077-43203533d0b9} + + + {e59da087-4433-46b9-862d-746cbed27b97} + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Header Files\Header Shared + + + libs_etc + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + libs_etc\pcre + + + client + + + client + + + client + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Shared Source Files + + + Source Files + + + Source Files + + + Shared Source Files + + + Shared Source Files + + + Source Files + + + Shared Source Files + + + Source Files + + + Shared Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Shared Source Files + + + Shared Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files\Header Shared + + + Header Files\Header Shared + + + Header Files\Header Shared + + + Header Files\Header Shared + + + Header Files\Header Shared + + + Header Files\Header Shared + + + Header Files\Header Shared + + + libs_etc + + + libs_etc + + + libs_etc\pcre + + + libs_etc\pcre + + + client + + + client + + + client + + + Header Files + + + Source Files + + + Source Files + + + Header Files + + + + + + + + + \ No newline at end of file diff -Nru mongodb-1.4.4/s/d_logic.cpp mongodb-1.6.3/s/d_logic.cpp --- mongodb-1.4.4/s/d_logic.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/d_logic.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -22,500 +22,57 @@ mostly around shard management and checking */ -#include "stdafx.h" +#include "pch.h" #include #include #include "../db/commands.h" #include "../db/jsobj.h" #include "../db/dbmessage.h" +#include "../db/query.h" #include "../client/connpool.h" #include "../util/queue.h" +#include "shard.h" +#include "d_logic.h" + using namespace std; namespace mongo { - - typedef map NSVersions; - - NSVersions globalVersions; - boost::thread_specific_ptr clientShardVersions; - - string shardConfigServer; - - boost::thread_specific_ptr clientServerIds; - map< string , BlockingQueue* > clientQueues; - - unsigned long long getVersion( BSONElement e , string& errmsg ){ - if ( e.eoo() ){ - errmsg = "no version"; - return 0; - } - - if ( e.isNumber() ) - return (unsigned long long)e.number(); - - if ( e.type() == Date || e.type() == Timestamp ) - return e._numberLong(); - - - errmsg = "version is not a numberic type"; - return 0; - } - - class MongodShardCommand : public Command { - public: - MongodShardCommand( const char * n ) : Command( n ){ - } - virtual bool slaveOk(){ - return false; - } - virtual bool adminOnly() { - return true; - } - }; - - class WriteBackCommand : public MongodShardCommand { - public: - virtual LockType locktype(){ return NONE; } - WriteBackCommand() : MongodShardCommand( "writebacklisten" ){} - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - - BSONElement e = cmdObj.firstElement(); - if ( e.type() != jstOID ){ - errmsg = "need oid as first value"; - return 0; - } - - const OID id = e.__oid(); - - if ( ! clientQueues[id.str()] ) - clientQueues[id.str()] = new BlockingQueue(); - - BSONObj z = clientQueues[id.str()]->blockingPop(); - log(1) << "WriteBackCommand got : " << z << endl; - - result.append( "data" , z ); - - return true; - } - } writeBackCommand; - - // setShardVersion( ns ) - - class SetShardVersion : public MongodShardCommand { - public: - SetShardVersion() : MongodShardCommand("setShardVersion"){} - - virtual void help( stringstream& help ) const { - help << " example: { setShardVersion : 'alleyinsider.foo' , version : 1 , configdb : '' } "; - } - - virtual LockType locktype(){ return WRITE; } // TODO: figure out how to make this not need to lock - - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - - bool authoritative = cmdObj.getBoolField( "authoritative" ); - - string configdb = cmdObj["configdb"].valuestrsafe(); - { // configdb checking - if ( configdb.size() == 0 ){ - errmsg = "no configdb"; - return false; - } - - if ( shardConfigServer.size() == 0 ){ - if ( ! authoritative ){ - result.appendBool( "need_authoritative" , true ); - errmsg = "first setShardVersion"; - return false; - } - shardConfigServer = configdb; - } - else if ( shardConfigServer != configdb ){ - errmsg = "specified a different configdb!"; - return false; - } - } - - { // setting up ids - if ( cmdObj["serverID"].type() != jstOID ){ - // TODO: fix this - //errmsg = "need serverID to be an OID"; - //return 0; - } - else { - OID clientId = cmdObj["serverID"].__oid(); - if ( ! clientServerIds.get() ){ - string s = clientId.str(); - - OID * nid = new OID(); - nid->init( s ); - clientServerIds.reset( nid ); - - if ( ! clientQueues[s] ) - clientQueues[s] = new BlockingQueue(); - } - else if ( clientId != *clientServerIds.get() ){ - errmsg = "server id has changed!"; - return 0; - } - } - } - - unsigned long long version = getVersion( cmdObj["version"] , errmsg ); - if ( errmsg.size() ){ - return false; - } - - NSVersions * versions = clientShardVersions.get(); - - if ( ! versions ){ - log(1) << "entering shard mode for connection" << endl; - versions = new NSVersions(); - clientShardVersions.reset( versions ); - } - - string ns = cmdObj["setShardVersion"].valuestrsafe(); - if ( ns.size() == 0 ){ - errmsg = "need to speciy fully namespace"; - return false; - } - - unsigned long long& oldVersion = (*versions)[ns]; - unsigned long long& globalVersion = globalVersions[ns]; - - if ( version == 0 && globalVersion == 0 ){ - // this connection is cleaning itself - oldVersion = 0; - return 1; - } - - if ( version == 0 && globalVersion > 0 ){ - if ( ! authoritative ){ - result.appendBool( "need_authoritative" , true ); - result.appendTimestamp( "globalVersion" , globalVersion ); - result.appendTimestamp( "oldVersion" , oldVersion ); - errmsg = "dropping needs to be authoritative"; - return 0; - } - log() << "wiping data for: " << ns << endl; - result.appendTimestamp( "beforeDrop" , globalVersion ); - // only setting global version on purpose - // need clients to re-find meta-data - globalVersion = 0; - oldVersion = 0; - return 1; - } - - if ( version < oldVersion ){ - errmsg = "you already have a newer version"; - result.appendTimestamp( "oldVersion" , oldVersion ); - result.appendTimestamp( "newVersion" , version ); - return false; - } - - if ( version < globalVersion ){ - errmsg = "going to older version for global"; - return false; - } - - if ( globalVersion == 0 && ! cmdObj.getBoolField( "authoritative" ) ){ - // need authoritative for first look - result.appendBool( "need_authoritative" , true ); - result.append( "ns" , ns ); - errmsg = "first time for this ns"; - return false; - } - - result.appendTimestamp( "oldVersion" , oldVersion ); - oldVersion = version; - globalVersion = version; - - result.append( "ok" , 1 ); - return 1; - } - - } setShardVersion; - - class GetShardVersion : public MongodShardCommand { - public: - GetShardVersion() : MongodShardCommand("getShardVersion"){} - - virtual void help( stringstream& help ) const { - help << " example: { getShardVersion : 'alleyinsider.foo' } "; - } - - virtual LockType locktype(){ return WRITE; } // TODO: figure out how to make this not need to lock - - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - string ns = cmdObj["getShardVersion"].valuestrsafe(); - if ( ns.size() == 0 ){ - errmsg = "need to speciy fully namespace"; - return false; - } - - result.append( "configServer" , shardConfigServer.c_str() ); - - result.appendTimestamp( "global" , globalVersions[ns] ); - if ( clientShardVersions.get() ) - result.appendTimestamp( "mine" , (*clientShardVersions.get())[ns] ); - else - result.appendTimestamp( "mine" , 0 ); - - return true; - } - - } getShardVersion; - - class MoveShardStartCommand : public MongodShardCommand { - public: - MoveShardStartCommand() : MongodShardCommand( "movechunk.start" ){} - virtual void help( stringstream& help ) const { - help << "should not be calling this directly" << endl; - } - - virtual LockType locktype(){ return WRITE; } - - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - // so i have to start clone, tell caller its ok to make change - // at this point the caller locks me, and updates config db - // then finish calls finish, and then deletes data when cursors are done - - string ns = cmdObj["movechunk.start"].valuestrsafe(); - string to = cmdObj["to"].valuestrsafe(); - string from = cmdObj["from"].valuestrsafe(); // my public address, a tad redundant, but safe - BSONObj filter = cmdObj.getObjectField( "filter" ); - - if ( ns.size() == 0 ){ - errmsg = "need to specify namespace in command"; - return false; - } - - if ( to.size() == 0 ){ - errmsg = "need to specify server to move shard to"; - return false; - } - if ( from.size() == 0 ){ - errmsg = "need to specify server to move shard from (redundat i know)"; - return false; - } - - if ( filter.isEmpty() ){ - errmsg = "need to specify a filter"; - return false; - } - - log() << "got movechunk.start: " << cmdObj << endl; - - - BSONObj res; - bool ok; - - { - dbtemprelease unlock; - - ScopedDbConnection conn( to ); - ok = conn->runCommand( "admin" , - BSON( "startCloneCollection" << ns << - "from" << from << - "query" << filter - ) , - res ); - conn.done(); - } - - log() << " movechunk.start res: " << res << endl; - - if ( ok ){ - result.append( res["finishToken"] ); - } - else { - errmsg = "startCloneCollection failed: "; - errmsg += res["errmsg"].valuestrsafe(); - } - return ok; - } - - } moveShardStartCmd; - - class MoveShardFinishCommand : public MongodShardCommand { - public: - MoveShardFinishCommand() : MongodShardCommand( "movechunk.finish" ){} - virtual void help( stringstream& help ) const { - help << "should not be calling this directly" << endl; - } - - virtual LockType locktype(){ return WRITE; } - - bool run(const char *cmdns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ - // see MoveShardStartCommand::run - - string ns = cmdObj["movechunk.finish"].valuestrsafe(); - if ( ns.size() == 0 ){ - errmsg = "need ns as cmd value"; - return false; - } - - string to = cmdObj["to"].valuestrsafe(); - if ( to.size() == 0 ){ - errmsg = "need to specify server to move shard to"; - return false; - } - - - unsigned long long newVersion = getVersion( cmdObj["newVersion"] , errmsg ); - if ( newVersion == 0 ){ - errmsg = "have to specify new version number"; - return false; - } - - BSONObj finishToken = cmdObj.getObjectField( "finishToken" ); - if ( finishToken.isEmpty() ){ - errmsg = "need finishToken"; - return false; - } - - if ( ns != finishToken["collection"].valuestrsafe() ){ - errmsg = "namespaced don't match"; - return false; - } - - // now we're locked - globalVersions[ns] = newVersion; - NSVersions * versions = clientShardVersions.get(); - if ( ! versions ){ - versions = new NSVersions(); - clientShardVersions.reset( versions ); - } - (*versions)[ns] = newVersion; - - BSONObj res; - bool ok; - - { - dbtemprelease unlock; - - ScopedDbConnection conn( to ); - ok = conn->runCommand( "admin" , - BSON( "finishCloneCollection" << finishToken ) , - res ); - conn.done(); - } - - if ( ! ok ){ - // uh oh - errmsg = "finishCloneCollection failed!"; - result << "finishError" << res; - return false; - } - - // wait until cursors are clean - cout << "WARNING: deleting data before ensuring no more cursors TODO" << endl; - - dbtemprelease unlock; - - DBDirectClient client; - BSONObj removeFilter = finishToken.getObjectField( "query" ); - client.remove( ns , removeFilter ); - return true; - } - - } moveShardFinishCmd; - - bool haveLocalShardingInfo( const string& ns ){ - if ( shardConfigServer.empty() ) + bool handlePossibleShardedMessage( Message &m, DbResponse* dbresponse ){ + if ( ! shardingState.enabled() ) return false; - - unsigned long long version = globalVersions[ns]; - if ( version == 0 ) - return false; - - NSVersions * versions = clientShardVersions.get(); - if ( ! versions ) + int op = m.operation(); + if ( op < 2000 + || op >= 3000 + || op == dbGetMore // cursors are weird + ) return false; - return true; - } - - /** - * @ return true if not in sharded mode - or if version for this client is ok - */ - bool shardVersionOk( const string& ns , string& errmsg ){ - if ( shardConfigServer.empty() ){ - return true; - } - - NSVersions::iterator i = globalVersions.find( ns ); - if ( i == globalVersions.end() ) - return true; - - NSVersions * versions = clientShardVersions.get(); - if ( ! versions ){ - // this means the client has nothing sharded - // so this allows direct connections to do whatever they want - // which i think is the correct behavior - return true; - } - - unsigned long long clientVersion = (*versions)[ns]; - unsigned long long version = i->second; - - if ( version == 0 && clientVersion > 0 ){ - stringstream ss; - ss << "version: " << version << " clientVersion: " << clientVersion; - errmsg = ss.str(); - return false; - } - - if ( clientVersion >= version ) - return true; - - - if ( clientVersion == 0 ){ - errmsg = "client in sharded mode, but doesn't have version set for this collection"; - return false; - } - - errmsg = (string)"your version is too old ns: " + ns; - return false; - } - - - bool handlePossibleShardedMessage( Message &m, DbResponse &dbresponse ){ - - if ( shardConfigServer.empty() ){ - return false; - } - - int op = m.data->operation(); - if ( op < 2000 || op >= 3000 ) - return false; - - - const char *ns = m.data->_data + 4; + DbMessage d(m); + const char *ns = d.getns(); string errmsg; - if ( shardVersionOk( ns , errmsg ) ){ + if ( shardVersionOk( ns , opIsWrite( op ) , errmsg ) ){ return false; } - log() << "shardVersionOk failed ns:" << ns << " " << errmsg << endl; + log(1) << "connection meta data too old - will retry ns:(" << ns << ") op:(" << opToString(op) << ") " << errmsg << endl; if ( doesOpGetAResponse( op ) ){ + assert( dbresponse ); BufBuilder b( 32768 ); b.skip( sizeof( QueryResult ) ); { BSONObj obj = BSON( "$err" << errmsg ); - b.append( obj.objdata() , obj.objsize() ); + b.appendBuf( obj.objdata() , obj.objsize() ); } QueryResult *qr = (QueryResult*)b.buf(); - qr->_resultFlags() = QueryResult::ResultFlag_ErrSet | QueryResult::ResultFlag_ShardConfigStale; + qr->_resultFlags() = ResultFlag_ErrSet | ResultFlag_ShardConfigStale; qr->len = b.len(); qr->setOperation( opReply ); qr->cursorId = 0; @@ -526,22 +83,30 @@ Message * resp = new Message(); resp->setData( qr , true ); - dbresponse.response = resp; - dbresponse.responseTo = m.data->id; + dbresponse->response = resp; + dbresponse->responseTo = m.header()->id; return true; } - OID * clientID = clientServerIds.get(); - massert( 10422 , "write with bad shard config and no server id!" , clientID ); + OID writebackID; + writebackID.init(); + lastError.getSafe()->writeback( writebackID ); + + const OID& clientID = ShardedConnectionInfo::get(false)->getID(); + massert( 10422 , "write with bad shard config and no server id!" , clientID.isSet() ); - log() << "got write with an old config - writing back" << endl; + log(1) << "got write with an old config - writing back ns: " << ns << endl; + if ( logLevel ) log(1) << debugString( m ) << endl; BSONObjBuilder b; b.appendBool( "writeBack" , true ); b.append( "ns" , ns ); - b.appendBinData( "msg" , m.data->len , bdtCustom , (char*)(m.data) ); - log() << "writing back msg with len: " << m.data->len << " op: " << m.data->_operation << endl; - clientQueues[clientID->str()]->push( b.obj() ); + b.append( "id" , writebackID ); + b.appendTimestamp( "version" , shardingState.getVersion( ns ) ); + b.appendTimestamp( "yourVersion" , ShardedConnectionInfo::get( true )->getVersion( ns ) ); + b.appendBinData( "msg" , m.header()->len , bdtCustom , (char*)(m.singleData()) ); + log(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl; + queueWriteBack( clientID.str() , b.obj() ); return true; } diff -Nru mongodb-1.4.4/s/d_logic.h mongodb-1.6.3/s/d_logic.h --- mongodb-1.4.4/s/d_logic.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/d_logic.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,10 +18,135 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" +#include "../db/jsobj.h" +#include "util.h" namespace mongo { + + class ShardingState; + + typedef ShardChunkVersion ConfigVersion; + typedef map NSVersionMap; + + // ----------- + + class ChunkMatcher { + typedef map,BSONObjCmp> MyMap; + public: + + bool belongsToMe( const BSONObj& key , const DiskLoc& loc ) const; + + private: + ChunkMatcher( ConfigVersion version ); + + void gotRange( const BSONObj& min , const BSONObj& max ); + + ConfigVersion _version; + BSONObj _key; + MyMap _map; + + friend class ShardingState; + }; + + typedef shared_ptr ChunkMatcherPtr; + + // -------------- + // --- global state --- + // -------------- + + class ShardingState { + public: + ShardingState(); + + bool enabled() const { return _enabled; } + const string& getConfigServer() const { return _configServer; } + void enable( const string& server ); + + void gotShardName( const string& name ); + void gotShardHost( const string& host ); + + bool hasVersion( const string& ns ); + bool hasVersion( const string& ns , ConfigVersion& version ); + ConfigVersion& getVersion( const string& ns ); // TODO: this is dangeroues + void setVersion( const string& ns , const ConfigVersion& version ); + + void appendInfo( BSONObjBuilder& b ); + + ChunkMatcherPtr getChunkMatcher( const string& ns ); + + bool inCriticalMigrateSection(); + private: + + bool _enabled; + + string _configServer; + + string _shardName; + string _shardHost; + + mongo::mutex _mutex; + NSVersionMap _versions; + map _chunks; + }; + + extern ShardingState shardingState; + + // -------------- + // --- per connection --- + // -------------- + + class ShardedConnectionInfo { + public: + ShardedConnectionInfo(); + + const OID& getID() const { return _id; } + bool hasID() const { return _id.isSet(); } + void setID( const OID& id ); + + ConfigVersion& getVersion( const string& ns ); // TODO: this is dangeroues + void setVersion( const string& ns , const ConfigVersion& version ); + + static ShardedConnectionInfo* get( bool create ); + static void reset(); + + bool inForceMode() const { + return _forceMode; + } + + void enterForceMode(){ _forceMode = true; } + void leaveForceMode(){ _forceMode = false; } + + private: + + OID _id; + NSVersionMap _versions; + bool _forceMode; + + static boost::thread_specific_ptr _tl; + }; + + struct ShardForceModeBlock { + ShardForceModeBlock(){ + info = ShardedConnectionInfo::get( false ); + if ( info ) + info->enterForceMode(); + } + ~ShardForceModeBlock(){ + if ( info ) + info->leaveForceMode(); + } + + ShardedConnectionInfo * info; + }; + + // ----------------- + // --- core --- + // ----------------- + unsigned long long extractVersion( BSONElement e , string& errmsg ); + + /** * @return true if we have any shard info for the ns */ @@ -30,10 +155,20 @@ /** * @return true if the current threads shard version is ok, or not in sharded version */ - bool shardVersionOk( const string& ns , string& errmsg ); + bool shardVersionOk( const string& ns , bool write , string& errmsg ); /** * @return true if we took care of the message and nothing else should be done */ - bool handlePossibleShardedMessage( Message &m, DbResponse &dbresponse ); + bool handlePossibleShardedMessage( Message &m, DbResponse * dbresponse ); + + void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ); + + // ----------------- + // --- writeback --- + // ----------------- + + /* queue a write back on a remote server for a failed write */ + void queueWriteBack( const string& remote , const BSONObj& o ); + } diff -Nru mongodb-1.4.4/s/d_migrate.cpp mongodb-1.6.3/s/d_migrate.cpp --- mongodb-1.4.4/s/d_migrate.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/d_migrate.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,984 @@ +// d_migrate.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + + +/** + these are commands that live in mongod + mostly around shard management and checking + */ + +#include "pch.h" +#include +#include + +#include "../db/commands.h" +#include "../db/jsobj.h" +#include "../db/dbmessage.h" +#include "../db/query.h" +#include "../db/cmdline.h" + +#include "../client/connpool.h" +#include "../client/distlock.h" + +#include "../util/queue.h" +#include "../util/unittest.h" + +#include "shard.h" +#include "d_logic.h" +#include "config.h" +#include "chunk.h" + +using namespace std; + +namespace mongo { + + class MoveTimingHelper { + public: + MoveTimingHelper( const string& where , const string& ns ) + : _where( where ) , _ns( ns ){ + _next = 1; + } + + ~MoveTimingHelper(){ + configServer.logChange( (string)"moveChunk." + _where , _ns, _b.obj() ); + } + + void done( int step ){ + assert( step == _next++ ); + + stringstream ss; + ss << "step" << step; + string s = ss.str(); + + CurOp * op = cc().curop(); + if ( op ) + op->setMessage( s.c_str() ); + else + log( LL_WARNING ) << "op is null in MoveTimingHelper::done" << endl; + + _b.appendNumber( s , _t.millis() ); + _t.reset(); + } + + + private: + Timer _t; + + string _where; + string _ns; + + int _next; + + BSONObjBuilder _b; + }; + + struct OldDataCleanup { + string ns; + BSONObj min; + BSONObj max; + set initial; + void doRemove(){ + ShardForceModeBlock sf; + writelock lk(ns); + RemoveSaver rs("moveChunk",ns,"post-cleanup"); + long long num = Helpers::removeRange( ns , min , max , true , false , cmdLine.moveParanoia ? &rs : 0 ); + log() << "moveChunk deleted: " << num << endl; + } + }; + + void _cleanupOldData( OldDataCleanup cleanup ){ + Client::initThread( "cleanupOldData"); + log() << " (start) waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << " # cursors:" << cleanup.initial.size() << endl; + + int loops = 0; + Timer t; + while ( t.seconds() < 900 ){ // 15 minutes + assert( dbMutex.getState() == 0 ); + sleepmillis( 20 ); + + set now; + ClientCursor::find( cleanup.ns , now ); + + set left; + for ( set::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ){ + CursorId id = *i; + if ( now.count(id) ) + left.insert( id ); + } + + if ( left.size() == 0 ) + break; + cleanup.initial = left; + + if ( ( loops++ % 200 ) == 0 ){ + log() << " (looping " << loops << ") waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << " # cursors:" << cleanup.initial.size() << endl; + + stringstream ss; + for ( set::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ){ + CursorId id = *i; + ss << id << " "; + } + log() << " cursors: " << ss.str() << endl; + } + } + + cleanup.doRemove(); + + cc().shutdown(); + } + + void cleanupOldData( OldDataCleanup cleanup ){ + try { + _cleanupOldData( cleanup ); + } + catch ( std::exception& e ){ + log() << " error cleaning old data:" << e.what() << endl; + } + catch ( ... ){ + log() << " unknown error cleaning old data" << endl; + } + } + + class ChunkCommandHelper : public Command { + public: + ChunkCommandHelper( const char * name ) + : Command( name ){ + } + + virtual void help( stringstream& help ) const { + help << "internal should not be calling this directly" << endl; + } + virtual bool slaveOk() const { return false; } + virtual bool adminOnly() const { return true; } + virtual LockType locktype() const { return NONE; } + + }; + + bool isInRange( const BSONObj& obj , const BSONObj& min , const BSONObj& max ){ + BSONObj k = obj.extractFields( min, true ); + + return k.woCompare( min ) >= 0 && k.woCompare( max ) < 0; + } + + + class MigrateFromStatus { + public: + + MigrateFromStatus() + : _mutex( "MigrateFromStatus" ){ + _active = false; + _inCriticalSection = false; + } + + void start( string ns , const BSONObj& min , const BSONObj& max ){ + assert( ! _active ); + + assert( ! min.isEmpty() ); + assert( ! max.isEmpty() ); + assert( ns.size() ); + + _ns = ns; + _min = min; + _max = max; + + _deleted.clear(); + _reload.clear(); + + _active = true; + } + + void done(){ + if ( ! _active ) + return; + _active = false; + _inCriticalSection = false; + + scoped_lock lk( _mutex ); + _deleted.clear(); + _reload.clear(); + } + + void logOp( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ){ + if ( ! _active ) + return; + + if ( _ns != ns ) + return; + + char op = opstr[0]; + if ( op == 'n' || op =='c' || ( op == 'd' && opstr[1] == 'b' ) ) + return; + + BSONElement ide; + if ( patt ) + ide = patt->getField( "_id" ); + else + ide = obj["_id"]; + + if ( ide.eoo() ){ + log( LL_WARNING ) << "logOpForSharding got mod with no _id, ignoring obj: " << obj << endl; + return; + } + + BSONObj it; + + switch ( opstr[0] ){ + + case 'd': { + // can't filter deletes :( + scoped_lock lk( _mutex ); + _deleted.push_back( ide.wrap() ); + return; + } + + case 'i': + it = obj; + break; + + case 'u': + if ( ! Helpers::findById( cc() , _ns.c_str() , ide.wrap() , it ) ){ + log( LL_WARNING ) << "logOpForSharding couldn't find: " << ide << " even though should have" << endl; + return; + } + break; + + } + + if ( ! isInRange( it , _min , _max ) ) + return; + + scoped_lock lk( _mutex ); + _reload.push_back( ide.wrap() ); + } + + void xfer( list * l , BSONObjBuilder& b , const char * name , long long& size , bool explode ){ + static long long maxSize = 1024 * 1024; + + if ( l->size() == 0 || size > maxSize ) + return; + + BSONArrayBuilder arr(b.subarrayStart(name)); + + list::iterator i = l->begin(); + + while ( i != l->end() && size < maxSize ){ + BSONObj t = *i; + if ( explode ){ + BSONObj it; + if ( Helpers::findById( cc() , _ns.c_str() , t, it ) ){ + arr.append( it ); + size += it.objsize(); + } + } + else { + arr.append( t ); + } + i = l->erase( i ); + size += t.objsize(); + } + + arr.done(); + } + + bool transferMods( string& errmsg , BSONObjBuilder& b ){ + if ( ! _active ){ + errmsg = "no active migration!"; + return false; + } + + long long size = 0; + + { + readlock rl( _ns ); + Client::Context cx( _ns ); + + scoped_lock lk( _mutex ); + xfer( &_deleted , b , "deleted" , size , false ); + xfer( &_reload , b , "reload" , size , true ); + } + + b.append( "size" , size ); + + return true; + } + + bool _inCriticalSection; + + private: + + bool _active; + + string _ns; + BSONObj _min; + BSONObj _max; + + list _reload; + list _deleted; + + mongo::mutex _mutex; + + } migrateFromStatus; + + struct MigrateStatusHolder { + MigrateStatusHolder( string ns , const BSONObj& min , const BSONObj& max ){ + migrateFromStatus.start( ns , min , max ); + } + ~MigrateStatusHolder(){ + migrateFromStatus.done(); + } + }; + + void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ){ + migrateFromStatus.logOp( opstr , ns , obj , patt ); + } + + class TransferModsCommand : public ChunkCommandHelper{ + public: + TransferModsCommand() : ChunkCommandHelper( "_transferMods" ){} + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + return migrateFromStatus.transferMods( errmsg, result ); + } + } transferModsCommand; + + /** + * this is the main entry for moveChunk + * called to initial a move + * usually by a mongos + * this is called on the "from" side + */ + class MoveChunkCommand : public Command { + public: + MoveChunkCommand() : Command( "moveChunk" ){} + virtual void help( stringstream& help ) const { + help << "should not be calling this directly" << endl; + } + + virtual bool slaveOk() const { return false; } + virtual bool adminOnly() const { return true; } + virtual LockType locktype() const { return NONE; } + + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + // 1. parse options + // 2. make sure my view is complete and lock + // 3. start migrate + // 4. pause till migrate caught up + // 5. LOCK + // a) update my config, essentially locking + // b) finish migrate + // c) update config server + // d) logChange to config server + // 6. wait for all current cursors to expire + // 7. remove data locally + + // ------------------------------- + + + // 1. + string ns = cmdObj.firstElement().str(); + string to = cmdObj["to"].str(); + string from = cmdObj["from"].str(); // my public address, a tad redundant, but safe + BSONObj min = cmdObj["min"].Obj(); + BSONObj max = cmdObj["max"].Obj(); + BSONElement shardId = cmdObj["shardId"]; + + if ( ns.empty() ){ + errmsg = "need to specify namespace in command"; + return false; + } + + if ( to.empty() ){ + errmsg = "need to specify server to move shard to"; + return false; + } + if ( from.empty() ){ + errmsg = "need to specify server to move shard from (redundat i know)"; + return false; + } + + if ( min.isEmpty() ){ + errmsg = "need to specify a min"; + return false; + } + + if ( max.isEmpty() ){ + errmsg = "need to specify a max"; + return false; + } + + if ( shardId.eoo() ){ + errmsg = "need shardId"; + return false; + } + + if ( ! shardingState.enabled() ){ + if ( cmdObj["configdb"].type() != String ){ + errmsg = "sharding not enabled"; + return false; + } + string configdb = cmdObj["configdb"].String(); + shardingState.enable( configdb ); + configServer.init( configdb ); + } + + MoveTimingHelper timing( "from" , ns ); + + Shard fromShard( from ); + Shard toShard( to ); + + log() << "got movechunk: " << cmdObj << endl; + + timing.done(1); + // 2. + + DistributedLock lockSetup( ConnectionString( shardingState.getConfigServer() , ConnectionString::SYNC ) , ns ); + dist_lock_try dlk( &lockSetup , (string)"migrate-" + min.toString() ); + if ( ! dlk.got() ){ + errmsg = "someone else has the lock"; + result.append( "who" , dlk.other() ); + return false; + } + + ShardChunkVersion maxVersion; + string myOldShard; + { + ScopedDbConnection conn( shardingState.getConfigServer() ); + + BSONObj x = conn->findOne( ShardNS::chunk , Query( BSON( "ns" << ns ) ).sort( BSON( "lastmod" << -1 ) ) ); + maxVersion = x["lastmod"]; + + x = conn->findOne( ShardNS::chunk , shardId.wrap( "_id" ) ); + assert( x["shard"].type() ); + myOldShard = x["shard"].String(); + + if ( myOldShard != fromShard.getName() ){ + errmsg = "i'm out of date"; + result.append( "from" , fromShard.getName() ); + result.append( "official" , myOldShard ); + return false; + } + + if ( maxVersion < shardingState.getVersion( ns ) ){ + errmsg = "official version less than mine?";; + result.appendTimestamp( "officialVersion" , maxVersion ); + result.appendTimestamp( "myVersion" , shardingState.getVersion( ns ) ); + return false; + } + + conn.done(); + } + + timing.done(2); + + // 3. + MigrateStatusHolder statusHolder( ns , min , max ); + { + dblock lk; + // this makes sure there wasn't a write inside the .cpp code we can miss + } + + { + + ScopedDbConnection conn( to ); + BSONObj res; + bool ok = conn->runCommand( "admin" , + BSON( "_recvChunkStart" << ns << + "from" << from << + "min" << min << + "max" << max << + "configServer" << configServer.modelServer() + ) , + res ); + conn.done(); + + if ( ! ok ){ + errmsg = "_recvChunkStart failed: "; + assert( res["errmsg"].type() ); + errmsg += res["errmsg"].String(); + result.append( "cause" , res ); + return false; + } + + } + timing.done( 3 ); + + // 4. + for ( int i=0; i<86400; i++ ){ // don't want a single chunk move to take more than a day + assert( dbMutex.getState() == 0 ); + sleepsecs( 1 ); + ScopedDbConnection conn( to ); + BSONObj res; + bool ok = conn->runCommand( "admin" , BSON( "_recvChunkStatus" << 1 ) , res ); + res = res.getOwned(); + conn.done(); + + log(0) << "_recvChunkStatus : " << res << endl; + + if ( ! ok || res["state"].String() == "fail" ){ + log( LL_ERROR ) << "_recvChunkStatus error : " << res << endl; + errmsg = "_recvChunkStatus error"; + result.append( "cause" ,res ); + return false; + } + + if ( res["state"].String() == "steady" ) + break; + + killCurrentOp.checkForInterrupt(); + } + timing.done(4); + + // 5. + { + // 5.a + migrateFromStatus._inCriticalSection = true; + ShardChunkVersion myVersion = maxVersion; + myVersion.incMajor(); + + { + dblock lk; + assert( myVersion > shardingState.getVersion( ns ) ); + shardingState.setVersion( ns , myVersion ); + assert( myVersion == shardingState.getVersion( ns ) ); + log() << "moveChunk locking myself to: " << myVersion << endl; + } + + + // 5.b + { + BSONObj res; + ScopedDbConnection conn( to ); + bool ok = conn->runCommand( "admin" , + BSON( "_recvChunkCommit" << 1 ) , + res ); + conn.done(); + log() << "moveChunk commit result: " << res << endl; + if ( ! ok ){ + log() << "_recvChunkCommit failed: " << res << endl; + errmsg = "_recvChunkCommit failed!"; + result.append( "cause" , res ); + return false; + } + } + + // 5.c + ScopedDbConnection conn( shardingState.getConfigServer() ); + + BSONObjBuilder temp; + temp.append( "shard" , toShard.getName() ); + temp.appendTimestamp( "lastmod" , myVersion ); + + conn->update( ShardNS::chunk , shardId.wrap( "_id" ) , BSON( "$set" << temp.obj() ) ); + + { + // update another random chunk + BSONObj x = conn->findOne( ShardNS::chunk , Query( BSON( "ns" << ns << "shard" << myOldShard ) ).sort( BSON( "lastmod" << -1 ) ) ); + if ( ! x.isEmpty() ){ + + BSONObjBuilder temp2; + myVersion.incMinor(); + + temp2.appendTimestamp( "lastmod" , myVersion ); + + shardingState.setVersion( ns , myVersion ); + + conn->update( ShardNS::chunk , x["_id"].wrap() , BSON( "$set" << temp2.obj() ) ); + + log() << "moveChunk updating self to: " << myVersion << endl; + } + else { + //++myVersion; + shardingState.setVersion( ns , 0 ); + + log() << "moveChunk now i'm empty" << endl; + } + } + + conn.done(); + migrateFromStatus._inCriticalSection = false; + // 5.d + configServer.logChange( "moveChunk" , ns , BSON( "min" << min << "max" << max << + "from" << fromShard.getName() << + "to" << toShard.getName() ) ); + } + + migrateFromStatus.done(); + timing.done(5); + + + { // 6. + OldDataCleanup c; + c.ns = ns; + c.min = min.getOwned(); + c.max = max.getOwned(); + ClientCursor::find( ns , c.initial ); + if ( c.initial.size() ){ + log() << "forking for cleaning up chunk data" << endl; + boost::thread t( boost::bind( &cleanupOldData , c ) ); + } + else { + log() << "doing delete inline" << endl; + // 7. + c.doRemove(); + } + + + } + timing.done(6); + + return true; + + } + + } moveChunkCmd; + + bool ShardingState::inCriticalMigrateSection(){ + return migrateFromStatus._inCriticalSection; + } + + /* ----- + below this are the "to" side commands + + command to initiate + worker thread + does initial clone + pulls initial change set + keeps pulling + keeps state + command to get state + commend to "commit" + */ + + class MigrateStatus { + public: + + MigrateStatus(){ + active = false; + } + + void prepare(){ + assert( ! active ); + state = READY; + errmsg = ""; + + numCloned = 0; + numCatchup = 0; + numSteady = 0; + + active = true; + } + + void go(){ + try { + _go(); + } + catch ( std::exception& e ){ + state = FAIL; + errmsg = e.what(); + log( LL_ERROR ) << "migrate failed: " << e.what() << endl; + } + catch ( ... ){ + state = FAIL; + errmsg = "UNKNOWN ERROR"; + log( LL_ERROR ) << "migrate failed with unknown exception" << endl; + } + active = false; + } + + void _go(){ + MoveTimingHelper timing( "to" , ns ); + + assert( active ); + assert( state == READY ); + assert( ! min.isEmpty() ); + assert( ! max.isEmpty() ); + + ScopedDbConnection conn( from ); + conn->getLastError(); // just test connection + + { // 1. copy indexes + auto_ptr indexes = conn->getIndexes( ns ); + vector all; + while ( indexes->more() ){ + all.push_back( indexes->next().getOwned() ); + } + + writelock lk( ns ); + Client::Context ct( ns ); + + string system_indexes = cc().database()->name + ".system.indexes"; + for ( unsigned i=0; i cursor = conn->query( ns , Query().minKey( min ).maxKey( max ) , /* QueryOption_Exhaust */ 0 ); + assert( cursor.get() ); + while ( cursor->more() ){ + BSONObj o = cursor->next().getOwned(); + { + writelock lk( ns ); + Helpers::upsert( ns , o ); + } + numCloned++; + } + + timing.done(3); + } + + { // 4. do bulk of mods + state = CATCHUP; + while ( true ){ + BSONObj res; + if ( ! conn->runCommand( "admin" , BSON( "_transferMods" << 1 ) , res ) ){ + state = FAIL; + errmsg = "_transferMods failed: "; + errmsg += res.toString(); + log( LL_ERROR ) << "_transferMods failed: " << res << endl; + conn.done(); + return; + } + if ( res["size"].number() == 0 ) + break; + + apply( res ); + } + + timing.done(4); + } + + { // 5. wait for commit + state = STEADY; + while ( state == STEADY || state == COMMIT_START ){ + BSONObj res; + if ( ! conn->runCommand( "admin" , BSON( "_transferMods" << 1 ) , res ) ){ + log() << "_transferMods failed in STEADY state: " << res << endl; + errmsg = res.toString(); + state = FAIL; + conn.done(); + return; + } + + if ( res["size"].number() > 0 && apply( res ) ) + continue; + + if ( state == COMMIT_START ) + break; + + sleepmillis( 10 ); + } + + timing.done(5); + } + + state = DONE; + conn.done(); + } + + void status( BSONObjBuilder& b ){ + b.appendBool( "active" , active ); + + b.append( "ns" , ns ); + b.append( "from" , from ); + b.append( "min" , min ); + b.append( "max" , max ); + + b.append( "state" , stateString() ); + if ( state == FAIL ) + b.append( "errmsg" , errmsg ); + { + BSONObjBuilder bb( b.subobjStart( "counts" ) ); + bb.append( "cloned" , numCloned ); + bb.append( "catchup" , numCatchup ); + bb.append( "steady" , numSteady ); + bb.done(); + } + + + } + + bool apply( const BSONObj& xfer ){ + bool didAnything = false; + + if ( xfer["deleted"].isABSONObj() ){ + writelock lk(ns); + Client::Context cx(ns); + + RemoveSaver rs( "moveChunk" , ns , "removedDuring" ); + + BSONObjIterator i( xfer["deleted"].Obj() ); + while ( i.more() ){ + BSONObj id = i.next().Obj(); + Helpers::removeRange( ns , id , id, false , true , cmdLine.moveParanoia ? &rs : 0 ); + didAnything = true; + } + } + + if ( xfer["reload"].isABSONObj() ){ + writelock lk(ns); + Client::Context cx(ns); + + BSONObjIterator i( xfer["reload"].Obj() ); + while ( i.more() ){ + BSONObj it = i.next().Obj(); + Helpers::upsert( ns , it ); + didAnything = true; + } + } + + return didAnything; + } + + string stateString(){ + switch ( state ){ + case READY: return "ready"; + case CLONE: return "clone"; + case CATCHUP: return "catchup"; + case STEADY: return "steady"; + case COMMIT_START: return "commitStart"; + case DONE: return "done"; + case FAIL: return "fail"; + } + assert(0); + return ""; + } + + bool startCommit(){ + if ( state != STEADY ) + return false; + state = COMMIT_START; + + for ( int i=0; i<86400; i++ ){ + sleepmillis(1); + if ( state == DONE ) + return true; + } + log() << "startCommit never finished!" << endl; + return false; + } + + bool active; + + string ns; + string from; + + BSONObj min; + BSONObj max; + + long long numCloned; + long long numCatchup; + long long numSteady; + + enum State { READY , CLONE , CATCHUP , STEADY , COMMIT_START , DONE , FAIL } state; + string errmsg; + + } migrateStatus; + + void migrateThread(){ + Client::initThread( "migrateThread" ); + migrateStatus.go(); + cc().shutdown(); + } + + class RecvChunkStartCommand : public ChunkCommandHelper { + public: + RecvChunkStartCommand() : ChunkCommandHelper( "_recvChunkStart" ){} + + virtual LockType locktype() const { return WRITE; } // this is so don't have to do locking internally + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + + if ( migrateStatus.active ){ + errmsg = "migrate already in progress"; + return false; + } + + if ( ! configServer.ok() ) + configServer.init( cmdObj["configServer"].String() ); + + migrateStatus.prepare(); + + migrateStatus.ns = cmdObj.firstElement().String(); + migrateStatus.from = cmdObj["from"].String(); + migrateStatus.min = cmdObj["min"].Obj().getOwned(); + migrateStatus.max = cmdObj["max"].Obj().getOwned(); + + boost::thread m( migrateThread ); + + result.appendBool( "started" , true ); + return true; + } + + } recvChunkStartCmd; + + class RecvChunkStatusCommand : public ChunkCommandHelper { + public: + RecvChunkStatusCommand() : ChunkCommandHelper( "_recvChunkStatus" ){} + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + migrateStatus.status( result ); + return 1; + } + + } recvChunkStatusCommand; + + class RecvChunkCommitCommand : public ChunkCommandHelper { + public: + RecvChunkCommitCommand() : ChunkCommandHelper( "_recvChunkCommit" ){} + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + bool ok = migrateStatus.startCommit(); + migrateStatus.status( result ); + return ok; + } + + } recvChunkCommitCommand; + + + class IsInRangeTest : public UnitTest { + public: + void run(){ + BSONObj min = BSON( "x" << 1 ); + BSONObj max = BSON( "x" << 5 ); + + assert( ! isInRange( BSON( "x" << 0 ) , min , max ) ); + assert( isInRange( BSON( "x" << 1 ) , min , max ) ); + assert( isInRange( BSON( "x" << 3 ) , min , max ) ); + assert( isInRange( BSON( "x" << 4 ) , min , max ) ); + assert( ! isInRange( BSON( "x" << 5 ) , min , max ) ); + assert( ! isInRange( BSON( "x" << 6 ) , min , max ) ); + } + } isInRangeTest; +} diff -Nru mongodb-1.4.4/s/d_split.cpp mongodb-1.6.3/s/d_split.cpp --- mongodb-1.4.4/s/d_split.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/d_split.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,211 @@ +// d_split.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include +#include + +#include "../db/btree.h" +#include "../db/commands.h" +#include "../db/dbmessage.h" +#include "../db/jsobj.h" +#include "../db/query.h" +#include "../db/queryoptimizer.h" + +namespace mongo { + + // TODO: Fold these checks into each command. + static IndexDetails *cmdIndexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ) { + if ( ns[ 0 ] == '\0' || min.isEmpty() || max.isEmpty() ) { + errmsg = "invalid command syntax (note: min and max are required)"; + return 0; + } + return indexDetailsForRange( ns, errmsg, min, max, keyPattern ); + } + + + class CmdMedianKey : public Command { + public: + CmdMedianKey() : Command( "medianKey" ) {} + virtual bool slaveOk() const { return true; } + virtual LockType locktype() const { return READ; } + virtual void help( stringstream &help ) const { + help << + "Internal command.\n" + "example: { medianKey:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }\n" + "NOTE: This command may take a while to run"; + } + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + const char *ns = jsobj.getStringField( "medianKey" ); + BSONObj min = jsobj.getObjectField( "min" ); + BSONObj max = jsobj.getObjectField( "max" ); + BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); + + Client::Context ctx( ns ); + + IndexDetails *id = cmdIndexDetailsForRange( ns, errmsg, min, max, keyPattern ); + if ( id == 0 ) + return false; + + Timer timer; + int num = 0; + NamespaceDetails *d = nsdetails(ns); + int idxNo = d->idxNo(*id); + + // only yielding on firt half for now + // after this it should be in ram, so 2nd should be fast + { + shared_ptr c( new BtreeCursor( d, idxNo, *id, min, max, false, 1 ) ); + scoped_ptr cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); + while ( c->ok() ){ + num++; + c->advance(); + if ( ! cc->yieldSometimes() ) + break; + } + } + + num /= 2; + + BtreeCursor c( d, idxNo, *id, min, max, false, 1 ); + for( ; num; c.advance(), --num ); + + ostringstream os; + os << "Finding median for index: " << keyPattern << " between " << min << " and " << max; + logIfSlow( timer , os.str() ); + + if ( !c.ok() ) { + errmsg = "no index entries in the specified range"; + return false; + } + + BSONObj median = c.prettyKey( c.currKey() ); + result.append( "median", median ); + + int x = median.woCompare( min , BSONObj() , false ); + int y = median.woCompare( max , BSONObj() , false ); + if ( x == 0 || y == 0 ){ + // its on an edge, ok + } + else if ( x < 0 && y < 0 ){ + log( LL_ERROR ) << "median error (1) min: " << min << " max: " << max << " median: " << median << endl; + errmsg = "median error 1"; + return false; + } + else if ( x > 0 && y > 0 ){ + log( LL_ERROR ) << "median error (2) min: " << min << " max: " << max << " median: " << median << endl; + errmsg = "median error 2"; + return false; + } + + return true; + } + } cmdMedianKey; + + class SplitVector : public Command { + public: + SplitVector() : Command( "splitVector" , false ){} + virtual bool slaveOk() const { return false; } + virtual LockType locktype() const { return READ; } + virtual void help( stringstream &help ) const { + help << + "Internal command.\n" + "example: { splitVector : \"myLargeCollection\" , keyPattern : {x:1} , maxChunkSize : 200 }\n" + "maxChunkSize unit in MBs\n" + "NOTE: This command may take a while to run"; + } + bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ + const char* ns = jsobj.getStringField( "splitVector" ); + BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); + + long long maxChunkSize = 0; + BSONElement maxSizeElem = jsobj[ "maxChunkSize" ]; + if ( ! maxSizeElem.eoo() ){ + maxChunkSize = maxSizeElem.numberLong() * 1<<20; + } else { + errmsg = "need to specify the desired max chunk size"; + return false; + } + + Client::Context ctx( ns ); + + BSONObjBuilder minBuilder; + BSONObjBuilder maxBuilder; + BSONForEach(key, keyPattern){ + minBuilder.appendMinKey( key.fieldName() ); + maxBuilder.appendMaxKey( key.fieldName() ); + } + BSONObj min = minBuilder.obj(); + BSONObj max = maxBuilder.obj(); + + IndexDetails *idx = cmdIndexDetailsForRange( ns , errmsg , min , max , keyPattern ); + if ( idx == NULL ){ + errmsg = "couldn't find index over splitting key"; + return false; + } + + NamespaceDetails *d = nsdetails( ns ); + BtreeCursor c( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); + + // We'll use the average object size and number of object to find approximately how many keys + // each chunk should have. We'll split a little smaller than the specificied by 'maxSize' + // assuming a recently sharded collectio is still going to grow. + + const long long dataSize = d->datasize; + const long long recCount = d->nrecords; + long long keyCount = 0; + if (( dataSize > 0 ) && ( recCount > 0 )){ + const long long avgRecSize = dataSize / recCount; + keyCount = 90 * maxChunkSize / (100 * avgRecSize); + } + + // We traverse the index and add the keyCount-th key to the result vector. If that key + // appeared in the vector before, we omit it. The assumption here is that all the + // instances of a key value live in the same chunk. + + Timer timer; + long long currCount = 0; + vector splitKeys; + BSONObj currKey; + while ( c.ok() ){ + currCount++; + if ( currCount > keyCount ){ + if ( ! currKey.isEmpty() && (currKey.woCompare( c.currKey() ) == 0 ) ) + continue; + + currKey = c.currKey(); + splitKeys.push_back( c.prettyKey( currKey ) ); + currCount = 0; + } + c.advance(); + } + + ostringstream os; + os << "Finding the split vector for " << ns << " over "<< keyPattern; + logIfSlow( timer , os.str() ); + + // Warning: we are sending back an array of keys but are currently limited to + // 4MB work of 'result' size. This should be okay for now. + + result.append( "splitKeys" , splitKeys ); + return true; + + } + } cmdSplitVector; + +} // namespace mongo diff -Nru mongodb-1.4.4/s/d_state.cpp mongodb-1.6.3/s/d_state.cpp --- mongodb-1.4.4/s/d_state.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/d_state.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,628 @@ +// d_state.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + + +/** + these are commands that live in mongod + mostly around shard management and checking + */ + +#include "pch.h" +#include +#include + +#include "../db/commands.h" +#include "../db/jsobj.h" +#include "../db/dbmessage.h" +#include "../db/query.h" + +#include "../client/connpool.h" + +#include "../util/queue.h" + +#include "shard.h" +#include "d_logic.h" +#include "config.h" + +using namespace std; + +namespace mongo { + + // -----ShardingState START ---- + + ShardingState::ShardingState() + : _enabled(false) , _mutex( "ShardingState" ){ + } + + void ShardingState::enable( const string& server ){ + _enabled = true; + assert( server.size() ); + if ( _configServer.size() == 0 ) + _configServer = server; + else { + assert( server == _configServer ); + } + } + + void ShardingState::gotShardName( const string& name ){ + if ( _shardName.size() == 0 ){ + _shardName = name; + return; + } + + if ( _shardName == name ) + return; + + stringstream ss; + ss << "gotShardName different than what i had before " + << " before [" << _shardName << "] " + << " got [" << name << "] " + ; + uasserted( 13298 , ss.str() ); + } + + void ShardingState::gotShardHost( const string& host ){ + if ( _shardHost.size() == 0 ){ + _shardHost = host; + return; + } + + if ( _shardHost == host ) + return; + + stringstream ss; + ss << "gotShardHost different than what i had before " + << " before [" << _shardHost << "] " + << " got [" << host << "] " + ; + uasserted( 13299 , ss.str() ); + } + + bool ShardingState::hasVersion( const string& ns ){ + scoped_lock lk(_mutex); + NSVersionMap::const_iterator i = _versions.find(ns); + return i != _versions.end(); + } + + bool ShardingState::hasVersion( const string& ns , ConfigVersion& version ){ + scoped_lock lk(_mutex); + NSVersionMap::const_iterator i = _versions.find(ns); + if ( i == _versions.end() ) + return false; + version = i->second; + return true; + } + + ConfigVersion& ShardingState::getVersion( const string& ns ){ + scoped_lock lk(_mutex); + return _versions[ns]; + } + + void ShardingState::setVersion( const string& ns , const ConfigVersion& version ){ + scoped_lock lk(_mutex); + ConfigVersion& me = _versions[ns]; + assert( version == 0 || version > me ); + me = version; + } + + void ShardingState::appendInfo( BSONObjBuilder& b ){ + b.appendBool( "enabled" , _enabled ); + if ( ! _enabled ) + return; + + b.append( "configServer" , _configServer ); + b.append( "shardName" , _shardName ); + b.append( "shardHost" , _shardHost ); + + { + BSONObjBuilder bb( b.subobjStart( "versions" ) ); + + scoped_lock lk(_mutex); + for ( NSVersionMap::iterator i=_versions.begin(); i!=_versions.end(); ++i ){ + bb.appendTimestamp( i->first.c_str() , i->second ); + } + bb.done(); + } + + } + + ChunkMatcherPtr ShardingState::getChunkMatcher( const string& ns ){ + if ( ! _enabled ) + return ChunkMatcherPtr(); + + if ( ! ShardedConnectionInfo::get( false ) ) + return ChunkMatcherPtr(); + + ConfigVersion version; + { + scoped_lock lk( _mutex ); + version = _versions[ns]; + + if ( ! version ) + return ChunkMatcherPtr(); + + ChunkMatcherPtr p = _chunks[ns]; + if ( p && p->_version >= version ) + return p; + } + + BSONObj q; + { + BSONObjBuilder b; + b.append( "ns" , ns.c_str() ); + b.append( "shard" , BSON( "$in" << BSON_ARRAY( _shardHost << _shardName ) ) ); + q = b.obj(); + } + + auto_ptr scoped; + auto_ptr direct; + + DBClientBase * conn; + + if ( _configServer == _shardHost ){ + direct.reset( new DBDirectClient() ); + conn = direct.get(); + } + else { + scoped.reset( new ScopedDbConnection( _configServer ) ); + conn = scoped->get(); + } + + auto_ptr cursor = conn->query( "config.chunks" , Query(q).sort( "min" ) ); + assert( cursor.get() ); + if ( ! cursor->more() ){ + if ( scoped.get() ) + scoped->done(); + return ChunkMatcherPtr(); + } + + ChunkMatcherPtr p( new ChunkMatcher( version ) ); + + BSONObj min,max; + while ( cursor->more() ){ + BSONObj d = cursor->next(); + + if ( min.isEmpty() ){ + min = d["min"].Obj().getOwned(); + max = d["max"].Obj().getOwned(); + continue; + } + + if ( max == d["min"].Obj() ){ + max = d["max"].Obj().getOwned(); + continue; + } + + p->gotRange( min.getOwned() , max.getOwned() ); + min = d["min"].Obj().getOwned(); + max = d["max"].Obj().getOwned(); + } + assert( ! min.isEmpty() ); + p->gotRange( min.getOwned() , max.getOwned() ); + + if ( scoped.get() ) + scoped->done(); + + { + scoped_lock lk( _mutex ); + _chunks[ns] = p; + } + + return p; + } + + ShardingState shardingState; + + // -----ShardingState END ---- + + // -----ShardedConnectionInfo START ---- + + boost::thread_specific_ptr ShardedConnectionInfo::_tl; + + ShardedConnectionInfo::ShardedConnectionInfo(){ + _forceMode = false; + _id.clear(); + } + + ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ){ + ShardedConnectionInfo* info = _tl.get(); + if ( ! info && create ){ + log(1) << "entering shard mode for connection" << endl; + info = new ShardedConnectionInfo(); + _tl.reset( info ); + } + return info; + } + + void ShardedConnectionInfo::reset(){ + _tl.reset(); + } + + ConfigVersion& ShardedConnectionInfo::getVersion( const string& ns ){ + return _versions[ns]; + } + + void ShardedConnectionInfo::setVersion( const string& ns , const ConfigVersion& version ){ + _versions[ns] = version; + } + + void ShardedConnectionInfo::setID( const OID& id ){ + _id = id; + } + + // -----ShardedConnectionInfo END ---- + + unsigned long long extractVersion( BSONElement e , string& errmsg ){ + if ( e.eoo() ){ + errmsg = "no version"; + return 0; + } + + if ( e.isNumber() ) + return (unsigned long long)e.number(); + + if ( e.type() == Date || e.type() == Timestamp ) + return e._numberLong(); + + + errmsg = "version is not a numeric type"; + return 0; + } + + class MongodShardCommand : public Command { + public: + MongodShardCommand( const char * n ) : Command( n ){ + } + virtual bool slaveOk() const { + return false; + } + virtual bool adminOnly() const { + return true; + } + }; + + + bool haveLocalShardingInfo( const string& ns ){ + if ( ! shardingState.enabled() ) + return false; + + if ( ! shardingState.hasVersion( ns ) ) + return false; + + return ShardedConnectionInfo::get(false) > 0; + } + + class UnsetShardingCommand : public MongodShardCommand { + public: + UnsetShardingCommand() : MongodShardCommand("unsetSharding"){} + + virtual void help( stringstream& help ) const { + help << " example: { unsetSharding : 1 } "; + } + + virtual LockType locktype() const { return NONE; } + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + ShardedConnectionInfo::reset(); + return true; + } + + } unsetShardingCommand; + + + class SetShardVersion : public MongodShardCommand { + public: + SetShardVersion() : MongodShardCommand("setShardVersion"){} + + virtual void help( stringstream& help ) const { + help << " example: { setShardVersion : 'alleyinsider.foo' , version : 1 , configdb : '' } "; + } + + virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + lastError.disableForCommand(); + ShardedConnectionInfo* info = ShardedConnectionInfo::get( true ); + + bool authoritative = cmdObj.getBoolField( "authoritative" ); + + string configdb = cmdObj["configdb"].valuestrsafe(); + { // configdb checking + if ( configdb.size() == 0 ){ + errmsg = "no configdb"; + return false; + } + + if ( shardingState.enabled() ){ + if ( configdb != shardingState.getConfigServer() ){ + errmsg = "specified a different configdb!"; + return false; + } + } + else { + if ( ! authoritative ){ + result.appendBool( "need_authoritative" , true ); + errmsg = "first setShardVersion"; + return false; + } + shardingState.enable( configdb ); + configServer.init( configdb ); + } + } + + if ( cmdObj["shard"].type() == String ){ + shardingState.gotShardName( cmdObj["shard"].String() ); + shardingState.gotShardHost( cmdObj["shardHost"].String() ); + } + + { // setting up ids + if ( cmdObj["serverID"].type() != jstOID ){ + // TODO: fix this + //errmsg = "need serverID to be an OID"; + //return 0; + } + else { + OID clientId = cmdObj["serverID"].__oid(); + if ( ! info->hasID() ){ + info->setID( clientId ); + } + else if ( clientId != info->getID() ){ + errmsg = "server id has changed!"; + return 0; + } + } + } + + unsigned long long version = extractVersion( cmdObj["version"] , errmsg ); + + if ( errmsg.size() ){ + return false; + } + + string ns = cmdObj["setShardVersion"].valuestrsafe(); + if ( ns.size() == 0 ){ + errmsg = "need to speciy fully namespace"; + return false; + } + + ConfigVersion& oldVersion = info->getVersion(ns); + ConfigVersion& globalVersion = shardingState.getVersion(ns); + + if ( oldVersion > 0 && globalVersion == 0 ){ + // this had been reset + oldVersion = 0; + } + + if ( version == 0 && globalVersion == 0 ){ + // this connection is cleaning itself + oldVersion = 0; + return 1; + } + + if ( version == 0 && globalVersion > 0 ){ + if ( ! authoritative ){ + result.appendBool( "need_authoritative" , true ); + result.appendTimestamp( "globalVersion" , globalVersion ); + result.appendTimestamp( "oldVersion" , oldVersion ); + errmsg = "dropping needs to be authoritative"; + return 0; + } + log() << "wiping data for: " << ns << endl; + result.appendTimestamp( "beforeDrop" , globalVersion ); + // only setting global version on purpose + // need clients to re-find meta-data + globalVersion = 0; + oldVersion = 0; + return 1; + } + + if ( version < oldVersion ){ + errmsg = "you already have a newer version"; + result.appendTimestamp( "oldVersion" , oldVersion ); + result.appendTimestamp( "newVersion" , version ); + result.appendTimestamp( "globalVersion" , globalVersion ); + return false; + } + + if ( version < globalVersion ){ + while ( shardingState.inCriticalMigrateSection() ){ + dbtemprelease r; + sleepmillis(2); + log() << "waiting till out of critical section" << endl; + } + errmsg = "going to older version for global"; + result.appendTimestamp( "version" , version ); + result.appendTimestamp( "globalVersion" , globalVersion ); + return false; + } + + if ( globalVersion == 0 && ! cmdObj.getBoolField( "authoritative" ) ){ + // need authoritative for first look + result.appendBool( "need_authoritative" , true ); + result.append( "ns" , ns ); + errmsg = "first time for this ns"; + return false; + } + + { + dbtemprelease unlock; + shardingState.getChunkMatcher( ns ); + } + + result.appendTimestamp( "oldVersion" , oldVersion ); + oldVersion = version; + globalVersion = version; + + result.append( "ok" , 1 ); + return 1; + } + + } setShardVersion; + + class GetShardVersion : public MongodShardCommand { + public: + GetShardVersion() : MongodShardCommand("getShardVersion"){} + + virtual void help( stringstream& help ) const { + help << " example: { getShardVersion : 'alleyinsider.foo' } "; + } + + virtual LockType locktype() const { return NONE; } + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + string ns = cmdObj["getShardVersion"].valuestrsafe(); + if ( ns.size() == 0 ){ + errmsg = "need to speciy fully namespace"; + return false; + } + + result.append( "configServer" , shardingState.getConfigServer() ); + + result.appendTimestamp( "global" , shardingState.getVersion(ns) ); + + ShardedConnectionInfo* info = ShardedConnectionInfo::get( false ); + if ( info ) + result.appendTimestamp( "mine" , info->getVersion(ns) ); + else + result.appendTimestamp( "mine" , 0 ); + + return true; + } + + } getShardVersion; + + class ShardingStateCmd : public MongodShardCommand { + public: + ShardingStateCmd() : MongodShardCommand( "shardingState" ){} + + virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + shardingState.appendInfo( result ); + return true; + } + + } shardingStateCmd; + + /** + * @ return true if not in sharded mode + or if version for this client is ok + */ + bool shardVersionOk( const string& ns , bool isWriteOp , string& errmsg ){ + if ( ! shardingState.enabled() ) + return true; + + ShardedConnectionInfo* info = ShardedConnectionInfo::get( false ); + + if ( ! info ){ + // this means the client has nothing sharded + // so this allows direct connections to do whatever they want + // which i think is the correct behavior + return true; + } + + if ( info->inForceMode() ){ + return true; + } + + ConfigVersion version; + if ( ! shardingState.hasVersion( ns , version ) ){ + return true; + } + + ConfigVersion clientVersion = info->getVersion(ns); + + if ( version == 0 && clientVersion > 0 ){ + stringstream ss; + ss << "collection was dropped or this shard no longer valied version: " << version << " clientVersion: " << clientVersion; + errmsg = ss.str(); + return false; + } + + if ( clientVersion >= version ) + return true; + + + if ( clientVersion == 0 ){ + stringstream ss; + ss << "client in sharded mode, but doesn't have version set for this collection: " << ns << " myVersion: " << version; + errmsg = ss.str(); + return false; + } + + if ( isWriteOp && version.majorVersion() == clientVersion.majorVersion() ){ + // this means there was just a split + // since on a split w/o a migrate this server is ok + // going to accept write + return true; + } + + stringstream ss; + ss << "your version is too old ns: " + ns << " global: " << version << " client: " << clientVersion; + errmsg = ss.str(); + return false; + } + + // --- ChunkMatcher --- + + ChunkMatcher::ChunkMatcher( ConfigVersion version ) + : _version( version ){ + + } + + void ChunkMatcher::gotRange( const BSONObj& min , const BSONObj& max ){ + if (_key.isEmpty()){ + BSONObjBuilder b; + + BSONForEach(e, min) { + b.append(e.fieldName(), 1); + } + + _key = b.obj(); + } + + //TODO debug mode only? + assert(min.nFields() == _key.nFields()); + assert(max.nFields() == _key.nFields()); + + _map[min] = make_pair(min,max); + } + + bool ChunkMatcher::belongsToMe( const BSONObj& key , const DiskLoc& loc ) const { + if ( _map.size() == 0 ) + return false; + + BSONObj x = loc.obj().extractFields(_key); + + MyMap::const_iterator a = _map.upper_bound( x ); + if ( a != _map.begin() ) + a--; + + bool good = x.woCompare( a->second.first ) >= 0 && x.woCompare( a->second.second ) < 0; +#if 0 + if ( ! good ){ + cout << "bad: " << x << "\t" << a->second.first << "\t" << x.woCompare( a->second.first ) << "\t" << x.woCompare( a->second.second ) << endl; + for ( MyMap::const_iterator i=_map.begin(); i!=_map.end(); ++i ){ + cout << "\t" << i->first << "\t" << i->second.first << "\t" << i->second.second << endl; + } + } +#endif + return good; + } + +} diff -Nru mongodb-1.4.4/s/d_util.cpp mongodb-1.6.3/s/d_util.cpp --- mongodb-1.4.4/s/d_util.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/d_util.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -22,14 +22,19 @@ mostly around shard management and checking */ -#include "stdafx.h" +#include "pch.h" #include "util.h" using namespace std; namespace mongo { - void checkShardVersion( DBClientBase & conn , const string& ns , bool authoritative ){ + bool checkShardVersion( DBClientBase & conn , const string& ns , bool authoritative , int tryNumber ){ + // no-op in mongod + return false; + } + + void resetShardVersion( DBClientBase * conn ){ // no-op in mongod } diff -Nru mongodb-1.4.4/s/d_writeback.cpp mongodb-1.6.3/s/d_writeback.cpp --- mongodb-1.4.4/s/d_writeback.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/d_writeback.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,80 @@ +// d_writeback.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" + +#include "../db/commands.h" +#include "../db/jsobj.h" +#include "../db/dbmessage.h" +#include "../db/query.h" + +#include "../client/connpool.h" + +#include "../util/queue.h" + +#include "shard.h" + +using namespace std; + +namespace mongo { + + map< string , BlockingQueue* > writebackQueue; + mongo::mutex writebackQueueLock("sharding:writebackQueueLock"); + + BlockingQueue* getWritebackQueue( const string& remote ){ + scoped_lock lk (writebackQueueLock ); + BlockingQueue*& q = writebackQueue[remote]; + if ( ! q ) + q = new BlockingQueue(); + return q; + } + + void queueWriteBack( const string& remote , const BSONObj& o ){ + getWritebackQueue( remote )->push( o ); + } + + // Note, this command will block until there is something to WriteBack + class WriteBackCommand : public Command { + public: + virtual LockType locktype() const { return NONE; } + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return true; } + + WriteBackCommand() : Command( "writebacklisten" ){} + + void help(stringstream& h) const { h<<"internal"; } + + bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ + + BSONElement e = cmdObj.firstElement(); + if ( e.type() != jstOID ){ + errmsg = "need oid as first value"; + return 0; + } + + const OID id = e.__oid(); + BSONObj z = getWritebackQueue(id.str())->blockingPop(); + log(1) << "WriteBackCommand got : " << z << endl; + + result.append( "data" , z ); + + return true; + } + } writeBackCommand; + +} diff -Nru mongodb-1.4.4/s/grid.cpp mongodb-1.6.3/s/grid.cpp --- mongodb-1.4.4/s/grid.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/grid.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,361 @@ +// grid.cpp + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" + +#include + +#include "../client/connpool.h" +#include "../util/stringutils.h" + +#include "grid.h" +#include "shard.h" + +namespace mongo { + + DBConfigPtr Grid::getDBConfig( string database , bool create , const string& shardNameHint ){ + { + string::size_type i = database.find( "." ); + if ( i != string::npos ) + database = database.substr( 0 , i ); + } + + if ( database == "config" ) + return configServerPtr; + + scoped_lock l( _lock ); + + DBConfigPtr& cc = _databases[database]; + if ( !cc ){ + cc.reset(new DBConfig( database )); + if ( ! cc->load() ){ + if ( create ){ + // note here that cc->primary == 0. + log() << "couldn't find database [" << database << "] in config db" << endl; + + { // lets check case + ScopedDbConnection conn( configServer.modelServer() ); + BSONObjBuilder b; + b.appendRegex( "_id" , (string)"^" + database + "$" , "i" ); + BSONObj d = conn->findOne( ShardNS::database , b.obj() ); + conn.done(); + + if ( ! d.isEmpty() ){ + cc.reset(); + stringstream ss; + ss << "can't have 2 databases that just differ on case " + << " have: " << d["_id"].String() + << " want to add: " << database; + + uasserted( DatabaseDifferCaseCode ,ss.str() ); + } + } + + Shard primary; + if ( database == "admin" ){ + primary = configServer.getPrimary(); + + } else if ( shardNameHint.empty() ){ + primary = Shard::pick(); + + } else { + // use the shard name if provided + Shard shard; + shard.reset( shardNameHint ); + primary = shard; + } + + if ( primary.ok() ){ + cc->setPrimary( primary.getName() ); // saves 'cc' to configDB + log() << "\t put [" << database << "] on: " << primary << endl; + } + else { + cc.reset(); + log() << "\t can't find a shard to put new db on" << endl; + uasserted( 10185 , "can't find a shard to put new db on" ); + } + } + else { + cc.reset(); + } + } + + } + + return cc; + } + + void Grid::removeDB( string database ){ + uassert( 10186 , "removeDB expects db name" , database.find( '.' ) == string::npos ); + scoped_lock l( _lock ); + _databases.erase( database ); + + } + + bool Grid::allowLocalHost() const { + return _allowLocalShard; + } + + void Grid::setAllowLocalHost( bool allow ){ + _allowLocalShard = allow; + } + + bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ){ + // name can be NULL, so privide a dummy one here to avoid testing it elsewhere + string nameInternal; + if ( ! name ) { + name = &nameInternal; + } + + // Check whether the host (or set) exists and run several sanity checks on this request. + // There are two set of sanity checks: making sure adding this particular shard is consistent + // with the replica set state (if it exists) and making sure this shards databases can be + // brought into the grid without conflict. + + vector dbNames; + try { + ScopedDbConnection newShardConn( servers ); + newShardConn->getLastError(); + + if ( newShardConn->type() == ConnectionString::SYNC ){ + newShardConn.done(); + errMsg = "can't use sync cluster as a shard. for replica set, have to use /,,..."; + return false; + } + + BSONObj resIsMaster; + bool ok = newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster ); + if ( !ok ){ + ostringstream ss; + ss << "failed running isMaster: " << resIsMaster; + errMsg = ss.str(); + newShardConn.done(); + return false; + } + + // if the shard has only one host, make sure it is not part of a replica set + string setName = resIsMaster["setName"].str(); + string commandSetName = servers.getSetName(); + if ( commandSetName.empty() && ! setName.empty() ){ + ostringstream ss; + ss << "host is part of set: " << setName << " use replica set url format /,,...."; + errMsg = ss.str(); + newShardConn.done(); + return false; + } + + // if the shard is part of replica set, make sure it is the right one + if ( ! commandSetName.empty() && ( commandSetName != setName ) ){ + ostringstream ss; + ss << "host is part of a different set: " << setName; + errMsg = ss.str(); + newShardConn.done(); + return false; + } + + // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of + // the set. It is fine if not all members of the set are present in 'servers'. + bool foundAll = true; + string offendingHost; + if ( ! commandSetName.empty() ){ + set hostSet; + BSONObjIterator iter( resIsMaster["hosts"].Obj() ); + while ( iter.more() ){ + hostSet.insert( iter.next().String() ); // host:port + } + + vector hosts = servers.getServers(); + for ( size_t i = 0 ; i < hosts.size() ; i++ ){ + string host = hosts[i].toString(); // host:port + if ( hostSet.find( host ) == hostSet.end() ){ + offendingHost = host; + foundAll = false; + break; + } + } + } + if ( ! foundAll ){ + ostringstream ss; + ss << "host " << offendingHost << " does not belong to replica set " << setName;; + errMsg = ss.str(); + newShardConn.done(); + return false; + } + + // shard name defaults to the name of the replica set + if ( name->empty() && ! setName.empty() ) + *name = setName; + + // In order to be accepted as a new shard, that mongod must not have any database name that exists already + // in any other shards. If that test passes, the new shard's databases are going to be entered as + // non-sharded db's whose primary is the newly added shard. + + BSONObj resListDB; + ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB ); + if ( !ok ){ + ostringstream ss; + ss << "failed listing " << servers.toString() << "'s databases:" << resListDB; + errMsg = ss.str(); + newShardConn.done(); + return false; + } + + BSONObjIterator i( resListDB["databases"].Obj() ); + while ( i.more() ){ + BSONObj dbEntry = i.next().Obj(); + const string& dbName = dbEntry["name"].String(); + if ( _isSpecialLocalDB( dbName ) ){ + // 'local', 'admin', and 'config' are system DBs and should be excluded here + continue; + } else { + dbNames.push_back( dbName ); + } + } + + newShardConn.done(); + } + catch ( DBException& e ){ + ostringstream ss; + ss << "couldn't connect to new shard "; + ss << e.what(); + errMsg = ss.str(); + return false; + } + + // check that none of the existing shard candidate's db's exist elsewhere + for ( vector::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ){ + DBConfigPtr config = getDBConfig( *it , false ); + if ( config.get() != NULL ){ + ostringstream ss; + ss << "trying to add shard " << servers.toString() << " because local database " << *it; + ss << " exists in another " << config->getPrimary().toString(); + errMsg = ss.str(); + return false; + } + } + + // if a name for a shard wasn't provided, pick one. + if ( name->empty() && ! _getNewShardName( name ) ){ + errMsg = "error generating new shard name"; + return false; + } + + // build the ConfigDB shard document + BSONObjBuilder b; + b.append( "_id" , *name ); + b.append( "host" , servers.toString() ); + if ( maxSize > 0 ){ + b.append( ShardFields::maxSize.name() , maxSize ); + } + BSONObj shardDoc = b.obj(); + + { + ScopedDbConnection conn( configServer.getPrimary() ); + + // check whether the set of hosts (or single host) is not an already a known shard + BSONObj old = conn->findOne( ShardNS::shard , BSON( "host" << servers.toString() ) ); + if ( ! old.isEmpty() ){ + errMsg = "host already used"; + conn.done(); + return false; + } + + log() << "going to add shard: " << shardDoc << endl; + + conn->insert( ShardNS::shard , shardDoc ); + errMsg = conn->getLastError(); + if ( ! errMsg.empty() ){ + log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; + conn.done(); + return false; + } + + conn.done(); + } + + Shard::reloadShardInfo(); + + // add all databases of the new shard + for ( vector::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ){ + DBConfigPtr config = getDBConfig( *it , true , *name ); + if ( ! config ){ + log() << "adding shard " << servers << " even though could not add database " << *it << endl; + } + } + + return true; + } + + bool Grid::knowAboutShard( const string& name ) const{ + ShardConnection conn( configServer.getPrimary() , "" ); + BSONObj shard = conn->findOne( ShardNS::shard , BSON( "host" << name ) ); + conn.done(); + return ! shard.isEmpty(); + } + + bool Grid::_getNewShardName( string* name ) const{ + DEV assert( name ); + + bool ok = false; + int count = 0; + + ShardConnection conn( configServer.getPrimary() , "" ); + BSONObj o = conn->findOne( ShardNS::shard , Query( fromjson ( "{_id: /^shard/}" ) ).sort( BSON( "_id" << -1 ) ) ); + if ( ! o.isEmpty() ) { + string last = o["_id"].String(); + istringstream is( last.substr( 5 ) ); + is >> count; + count++; + } + if (count < 9999) { + stringstream ss; + ss << "shard" << setfill('0') << setw(4) << count; + *name = ss.str(); + ok = true; + } + conn.done(); + + return ok; + } + + bool Grid::shouldBalance() const { + ShardConnection conn( configServer.getPrimary() , "" ); + + // look for the stop balancer marker + BSONObj stopMarker = conn->findOne( ShardNS::settings, BSON( "_id" << "balancer" << "stopped" << true ) ); + conn.done(); + return stopMarker.isEmpty(); + } + + unsigned long long Grid::getNextOpTime() const { + ScopedDbConnection conn( configServer.getPrimary() ); + + BSONObj result; + massert( 10421 , "getoptime failed" , conn->simpleCommand( "admin" , &result , "getoptime" ) ); + conn.done(); + + return result["optime"]._numberLong(); + } + + bool Grid::_isSpecialLocalDB( const string& dbName ){ + return ( dbName == "local" ) || ( dbName == "admin" ) || ( dbName == "config" ); + } + + Grid grid; + +} diff -Nru mongodb-1.4.4/s/grid.h mongodb-1.6.3/s/grid.h --- mongodb-1.4.4/s/grid.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/grid.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,105 @@ +// grid.h + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../util/concurrency/mutex.h" + +#include "config.h" // DBConfigPtr + +namespace mongo { + + /** + * stores meta-information about the grid + * TODO: used shard_ptr for DBConfig pointers + */ + class Grid { + public: + Grid() : _lock( "Grid" ) , _allowLocalShard( true ) { } + + /** + * gets the config the db. + * will return an empty DBConfig if not in db already + */ + DBConfigPtr getDBConfig( string ns , bool create=true , const string& shardNameHint="" ); + + /** + * removes db entry. + * on next getDBConfig call will fetch from db + */ + void removeDB( string db ); + + /** + * @return true if shards and config servers are allowed to use 'localhost' in address + */ + bool allowLocalHost() const; + + /** + * @param whether to allow shards and config servers to use 'localhost' in address + */ + void setAllowLocalHost( bool allow ); + + /** + * + * addShard will create a new shard in the grid. It expects a mongod process to be runing + * on the provided address. Adding a shard that is a replica set is supported. + * + * @param name is an optional string with the name of the shard. if ommited, grid will + * generate one and update the parameter. + * @param servers is the connection string of the shard being added + * @param maxSize is the optional space quota in bytes. Zeros means there's no limitation to + * space usage + * @param errMsg is the error description in case the operation failed. + * @return true if shard was successfully added. + */ + bool addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ); + + /** + * @return true if the config database knows about a host 'name' + */ + bool knowAboutShard( const string& name ) const; + + /** + * @return true if the chunk balancing functionality is enabled + */ + bool shouldBalance() const; + + unsigned long long getNextOpTime() const; + + private: + mongo::mutex _lock; // protects _databases; TODO: change to r/w lock ?? + map _databases; // maps ns to DBConfig's + bool _allowLocalShard; // can 'localhost' be used in shard addresses? + + /** + * @param name is the chose name for the shard. Parameter is mandatory. + * @return true if it managed to generate a shard name. May return false if (currently) + * 10000 shard + */ + bool _getNewShardName( string* name ) const; + + /** + * @return whether a give dbname is used for shard "local" databases (e.g., admin or local) + */ + static bool _isSpecialLocalDB( const string& dbName ); + + }; + + extern Grid grid; + +} // namespace mongo diff -Nru mongodb-1.4.4/s/request.cpp mongodb-1.6.3/s/request.cpp --- mongodb-1.4.4/s/request.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/request.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -19,32 +19,48 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "server.h" + #include "../db/commands.h" #include "../db/dbmessage.h" +#include "../db/stats/counters.h" + #include "../client/connpool.h" #include "request.h" #include "config.h" #include "chunk.h" +#include "stats.h" +#include "cursors.h" +#include "grid.h" namespace mongo { Request::Request( Message& m, AbstractMessagingPort* p ) : - _m(m) , _d( m ) , _p(p){ + _m(m) , _d( m ) , _p(p) , _didInit(false){ assert( _d.getns() ); - _id = _m.data->id; + _id = _m.header()->id; - _clientId = p ? p->remotePort() << 16 : 0; + _clientId = p ? p->getClientId() : 0; _clientInfo = ClientInfo::get( _clientId ); - _clientInfo->newRequest(); + _clientInfo->newRequest( p ); + } + + void Request::init(){ + if ( _didInit ) + return; + _didInit = true; reset(); } - + void Request::reset( bool reload ){ + if ( _m.operation() == dbKillCursors ){ + return; + } + _config = grid.getDBConfig( getns() ); if ( reload ) uassert( 10192 , "db config reload failed!" , _config->reload() ); @@ -54,49 +70,62 @@ uassert( 10193 , (string)"no shard info for: " + getns() , _chunkManager ); } else { - _chunkManager = 0; + _chunkManager.reset(); } - _m.data->id = _id; + _m.header()->id = _id; } - string Request::singleServerName(){ + Shard Request::primaryShard() const { + assert( _didInit ); + if ( _chunkManager ){ if ( _chunkManager->numChunks() > 1 ) - throw UserException( 8060 , "can't call singleServerName on a sharded collection" ); - return _chunkManager->findChunk( _chunkManager->getShardKey().globalMin() ).getShard(); + throw UserException( 8060 , "can't call primaryShard on a sharded collection" ); + return _chunkManager->findChunk( _chunkManager->getShardKey().globalMin() )->getShard(); } - string s = _config->getShard( getns() ); - uassert( 10194 , "can't call singleServerName on a sharded collection!" , s.size() > 0 ); + Shard s = _config->getShard( getns() ); + uassert( 10194 , "can't call primaryShard on a sharded collection!" , s.ok() ); return s; } void Request::process( int attempt ){ - - log(3) << "Request::process ns: " << getns() << " msg id:" << (int)(_m.data->id) << " attempt: " << attempt << endl; - - int op = _m.data->operation(); + init(); + int op = _m.operation(); assert( op > dbMsg ); + if ( op == dbKillCursors ){ + cursorCache.gotKillCursors( _m ); + return; + } + + + log(3) << "Request::process ns: " << getns() << " msg id:" << (int)(_m.header()->id) << " attempt: " << attempt << endl; + Strategy * s = SINGLE; + _counter = &opsNonSharded; _d.markSet(); - + if ( _chunkManager ){ s = SHARDED; + _counter = &opsSharded; } + bool iscmd = false; if ( op == dbQuery ) { + iscmd = isCommand(); try { s->queryOp( *this ); } catch ( StaleConfigException& staleConfig ){ log() << staleConfig.what() << " attempt: " << attempt << endl; uassert( 10195 , "too many attempts to update config, failing" , attempt < 5 ); - - sleepsecs( attempt ); - reset( true ); + ShardConnection::checkMyConnectionVersions( getns() ); + if (!staleConfig.justConnection() ) + sleepsecs( attempt ); + reset( ! staleConfig.justConnection() ); _d.markReset(); process( attempt + 1 ); return; @@ -108,8 +137,29 @@ else { s->writeOp( op, *this ); } + + globalOpCounters.gotOp( op , iscmd ); + _counter->gotOp( op , iscmd ); } + bool Request::isCommand() const { + int x = _d.getQueryNToReturn(); + return ( x == 1 || x == -1 ) && strstr( getns() , ".$cmd" ); + } + + void Request::gotInsert(){ + globalOpCounters.gotInsert(); + _counter->gotInsert(); + } + + void Request::reply( Message & response , const string& fromServer ){ + assert( _didInit ); + long long cursor =response.header()->getCursor(); + if ( cursor ){ + cursorCache.storeRef( fromServer , cursor ); + } + _p->reply( _m , response , _id ); + } ClientInfo::ClientInfo( int clientId ) : _id( clientId ){ _cur = &_a; @@ -118,18 +168,33 @@ } ClientInfo::~ClientInfo(){ - scoped_lock lk( _clientsLock ); - ClientCache::iterator i = _clients.find( _id ); - if ( i != _clients.end() ){ - _clients.erase( i ); + if ( _lastAccess ){ + scoped_lock lk( _clientsLock ); + ClientCache::iterator i = _clients.find( _id ); + if ( i != _clients.end() ){ + _clients.erase( i ); + } } } void ClientInfo::addShard( const string& shard ){ _cur->insert( shard ); + _sinceLastGetError.insert( shard ); } - void ClientInfo::newRequest(){ + void ClientInfo::newRequest( AbstractMessagingPort* p ){ + + if ( p ){ + string r = p->remote().toString(); + if ( _remote == "" ) + _remote = r; + else if ( _remote != r ){ + stringstream ss; + ss << "remotes don't match old [" << _remote << "] new [" << r << "]"; + throw UserException( 13134 , ss.str() ); + } + } + _lastAccess = (int) time(0); set * temp = _cur; @@ -168,8 +233,23 @@ return info; } - map ClientInfo::_clients; - mongo::mutex ClientInfo::_clientsLock; + void ClientInfo::disconnect( int clientId ){ + if ( ! clientId ) + return; + + scoped_lock lk( _clientsLock ); + ClientCache::iterator i = _clients.find( clientId ); + if ( i == _clients.end() ) + return; + + ClientInfo* ci = i->second; + ci->disconnect(); + delete ci; + _clients.erase( i ); + } + + ClientCache& ClientInfo::_clients = *(new ClientCache()); + mongo::mutex ClientInfo::_clientsLock("_clientsLock"); boost::thread_specific_ptr ClientInfo::_tlInfo; } // namespace mongo diff -Nru mongodb-1.4.4/s/request.h mongodb-1.6.3/s/request.h --- mongodb-1.4.4/s/request.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/request.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,14 +18,16 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../util/message.h" #include "../db/dbmessage.h" #include "config.h" #include "util.h" namespace mongo { + + class OpCounters; class ClientInfo; class Request : boost::noncopyable { @@ -33,75 +35,80 @@ Request( Message& m, AbstractMessagingPort* p ); // ---- message info ----- + - - const char * getns(){ + const char * getns() const { return _d.getns(); } - int op(){ - return _m.data->operation(); + int op() const { + return _m.operation(); } - bool expectResponse(){ + bool expectResponse() const { return op() == dbQuery || op() == dbGetMore; } - - MSGID id(){ + bool isCommand() const; + + MSGID id() const { return _id; } - DBConfig * getConfig(){ + DBConfigPtr getConfig() const { + assert( _didInit ); return _config; } - bool isShardingEnabled(){ + bool isShardingEnabled() const { + assert( _didInit ); return _config->isShardingEnabled(); } - ChunkManager * getChunkManager(){ + ChunkManagerPtr getChunkManager() const { + assert( _didInit ); return _chunkManager; } - int getClientId(){ + int getClientId() const { return _clientId; } - ClientInfo * getClientInfo(){ + ClientInfo * getClientInfo() const { return _clientInfo; } // ---- remote location info ----- - - string singleServerName(); - const char * primaryName(){ - return _config->getPrimary().c_str(); - } - + Shard primaryShard() const ; + // ---- low level access ---- - void reply( Message & response ){ - _p->reply( _m , response , _id ); - } + void reply( Message & response , const string& fromServer ); - Message& m(){ return _m; } - DbMessage& d(){ return _d; } - AbstractMessagingPort* p(){ return _p; } + Message& m() { return _m; } + DbMessage& d() { return _d; } + AbstractMessagingPort* p() const { return _p; } void process( int attempt = 0 ); - - private: - + + void gotInsert(); + + void init(); + void reset( bool reload=false ); + private: Message& _m; DbMessage _d; AbstractMessagingPort* _p; MSGID _id; - DBConfig * _config; - ChunkManager * _chunkManager; + DBConfigPtr _config; + ChunkManagerPtr _chunkManager; int _clientId; ClientInfo * _clientInfo; + + OpCounters* _counter; + + bool _didInit; }; typedef map ClientCache; @@ -111,24 +118,36 @@ ClientInfo( int clientId ); ~ClientInfo(); + string getRemote() const { return _remote; } + void addShard( const string& shard ); set * getPrev() const { return _prev; }; - void newRequest(); + void newRequest( AbstractMessagingPort* p = 0 ); void disconnect(); - + static ClientInfo * get( int clientId = 0 , bool create = true ); + static void disconnect( int clientId ); + const set& sinceLastGetError() const { return _sinceLastGetError; } + void clearSinceLastGetError(){ + _sinceLastGetError.clear(); + } + private: int _id; + string _remote; + set _a; set _b; set * _cur; set * _prev; int _lastAccess; + set _sinceLastGetError; + static mongo::mutex _clientsLock; - static ClientCache _clients; + static ClientCache& _clients; static boost::thread_specific_ptr _tlInfo; }; } diff -Nru mongodb-1.4.4/s/server.cpp mongodb-1.6.3/s/server.cpp --- mongodb-1.4.4/s/server.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/server.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,26 +16,30 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../util/message.h" #include "../util/unittest.h" #include "../client/connpool.h" #include "../util/message_server.h" +#include "../util/stringutils.h" +#include "../util/version.h" +#include "../db/dbwebserver.h" #include "server.h" #include "request.h" #include "config.h" #include "chunk.h" +#include "balance.h" +#include "grid.h" +#include "cursors.h" namespace mongo { - + CmdLine cmdLine; Database *database = 0; string mongosCommand; - string ourHostname; - OID serverID; bool dbexitCalled = false; - + bool inShutdown(){ return dbexitCalled; } @@ -59,9 +63,7 @@ class ShardingConnectionHook : public DBConnectionHook { public: - virtual void onCreate( DBClientBase * conn ){ - conn->simpleCommand( "admin" , 0 , "switchtoclienterrors" ); - } + virtual void onHandedOut( DBClientBase * conn ){ ClientInfo::get()->addShard( conn->getServerAddress() ); } @@ -70,47 +72,68 @@ class ShardedMessageHandler : public MessageHandler { public: virtual ~ShardedMessageHandler(){} + virtual void process( Message& m , AbstractMessagingPort* p ){ + assert( p ); Request r( m , p ); + + LastError * le = lastError.startRequest( m , r.getClientId() ); + assert( le ); + if ( logLevel > 5 ){ log(5) << "client id: " << hex << r.getClientId() << "\t" << r.getns() << "\t" << dec << r.op() << endl; } try { + r.init(); setClientId( r.getClientId() ); r.process(); } catch ( DBException& e ){ - m.data->id = r.id(); - log() << "UserException: " << e.what() << endl; + log() << "DBException in process: " << e.what() << endl; + + le->raiseError( e.getCode() , e.what() ); + + m.header()->id = r.id(); + if ( r.expectResponse() ){ - BSONObj err = BSON( "$err" << e.what() ); - replyToQuery( QueryResult::ResultFlag_ErrSet, p , m , err ); + BSONObj err = BSON( "$err" << e.what() << "code" << e.getCode() ); + replyToQuery( ResultFlag_ErrSet, p , m , err ); } } } + + virtual void disconnected( AbstractMessagingPort* p ){ + ClientInfo::disconnect( p->getClientId() ); + lastError.disconnect( p->getClientId() ); + } }; void sighandler(int sig){ - dbexit(EXIT_CLEAN, (string("recieved signal ") + BSONObjBuilder::numStr(sig)).c_str()); + dbexit(EXIT_CLEAN, (string("received signal ") + BSONObjBuilder::numStr(sig)).c_str()); } void setupSignals(){ - // needed for cmdLine, btu we do it in init() + signal(SIGTERM, sighandler); + signal(SIGINT, sighandler); } void init(){ serverID.init(); setupSIGTRAPforGDB(); - signal(SIGTERM, sighandler); - signal(SIGINT, sighandler); + setupCoreSignals(); + setupSignals(); } - void start() { + void start( const MessageServer::Options& opts ){ + balancer.go(); + cursorCache.startTimeoutThread(); + log() << "waiting for connections on port " << cmdLine.port << endl; //DbGridListener l(port); //l.listen(); ShardedMessageHandler handler; - MessageServer * server = createServer( cmdLine.port , &handler ); + MessageServer * server = createServer( opts , &handler ); + server->setAsTimeTracker(); server->run(); } @@ -120,7 +143,7 @@ } void printShardingVersionInfo(){ - log() << mongosCommand << " v0.3 (alpha 3) starting (--help for usage)" << endl; + log() << mongosCommand << " " << mongodVersion() << " starting (--help for usage)" << endl; printGitVersion(); printSysInfo(); } @@ -146,8 +169,11 @@ options.add_options() ( "configdb" , po::value() , "1 or 3 comma separated config servers" ) ( "test" , "just run unit tests" ) + ( "upgrade" , "upgrade meta data version" ) + ( "chunkSize" , po::value(), "maximum amount of data per chunk" ) + ( "ipv6", "enable IPv6 support (disabled by default)" ) ; - + // parse options po::variables_map params; @@ -164,6 +190,13 @@ return 0; } + if ( params.count( "chunkSize" ) ){ + Chunk::MaxChunkSize = params["chunkSize"].as() * 1024 * 1024; + } + + if ( params.count( "ipv6" ) ){ + enableIPv6(); + } if ( params.count( "test" ) ){ logLevel = 5; @@ -178,23 +211,33 @@ } vector configdbs; - { - string s = params["configdb"].as(); - while ( true ){ - size_t idx = s.find( ',' ); - if ( idx == string::npos ){ - configdbs.push_back( s ); - break; - } - configdbs.push_back( s.substr( 0 , idx ) ); - s = s.substr( idx + 1 ); - } - } - + splitStringDelim( params["configdb"].as() , &configdbs , ',' ); if ( configdbs.size() != 1 && configdbs.size() != 3 ){ out() << "need either 1 or 3 configdbs" << endl; return 5; } + + // we either have a seeting were all process are in localhost or none is + for ( vector::const_iterator it = configdbs.begin() ; it != configdbs.end() ; ++it ){ + try { + + HostAndPort configAddr( *it ); // will throw if address format is invalid + + if ( it == configdbs.begin() ){ + grid.setAllowLocalHost( configAddr.isLocalHost() ); + } + + if ( configAddr.isLocalHost() != grid.allowLocalHost() ){ + out() << "cannot mix localhost and ip addresses in configdbs" << endl; + return 10; + } + + } + catch ( DBException& e) { + out() << "configdb: " << e.what() << endl; + return 9; + } + } pool.addHook( &shardingConnectionHook ); @@ -213,24 +256,36 @@ printShardingVersionInfo(); if ( ! configServer.init( configdbs ) ){ - cout << "couldn't connectd to config db" << endl; + cout << "couldn't resolve config db address" << endl; return 7; } - if ( ! configServer.ok() ){ + if ( ! configServer.ok( true ) ){ cout << "configServer startup check failed" << endl; return 8; } - int configError = configServer.checkConfigVersion(); + int configError = configServer.checkConfigVersion( params.count( "upgrade" ) ); if ( configError ){ - cout << "config server error: " << configError << endl; + if ( configError > 0 ){ + cout << "upgrade success!" << endl; + } + else { + cout << "config server error: " << configError << endl; + } return configError; } configServer.reloadSettings(); - + init(); - start(); + + boost::thread web( webServerThread ); + + MessageServer::Options opts; + opts.port = cmdLine.port; + opts.ipList = cmdLine.bind_ip; + start(opts); + dbexit( EXIT_CLEAN ); return 0; } diff -Nru mongodb-1.4.4/s/server.h mongodb-1.6.3/s/server.h --- mongodb-1.4.4/s/server.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/server.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,7 +22,6 @@ namespace mongo { - extern std::string ourHostname; extern OID serverID; // from request.cpp diff -Nru mongodb-1.4.4/s/shardconnection.cpp mongodb-1.6.3/s/shardconnection.cpp --- mongodb-1.4.4/s/shardconnection.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/shardconnection.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,279 @@ +// shardconnection.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "shard.h" +#include "config.h" +#include "request.h" +#include + +namespace mongo { + + /** + * holds all the actual db connections for a client to various servers + * 1 pre thread, so don't have to worry about thread safety + */ + class ClientConnections : boost::noncopyable { + public: + struct Status : boost::noncopyable { + Status() : created(0), avail(0){} + + long long created; + DBClientBase* avail; + }; + + + Nullstream& debug( Status * s = 0 , const string& addr = "" ){ + static int ll = 9; + + if ( logLevel < ll ) + return nullstream; + Nullstream& l = log(ll); + + l << "ClientConnections DEBUG " << this << " "; + if ( s ){ + l << "s: " << s << " addr: " << addr << " "; + } + return l; + } + + ClientConnections() : _mutex("ClientConnections") { + debug() << " NEW " << endl; + } + + ~ClientConnections(){ + debug() << " KILLING " << endl; + for ( map::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ){ + string addr = i->first; + Status* ss = i->second; + assert( ss ); + if ( ss->avail ){ + /* if we're shutting down, don't want to initiate release mechanism as it is slow, + and isn't needed since all connections will be closed anyway */ + if ( inShutdown() ) + delete ss->avail; + else + release( addr , ss->avail ); + ss->avail = 0; + } + delete ss; + } + _hosts.clear(); + } + + DBClientBase * get( const string& addr , const string& ns ){ + _check( ns ); + scoped_lock lk( _mutex ); + Status* &s = _hosts[addr]; + if ( ! s ) + s = new Status(); + + debug( s , addr ) << "WANT ONE pool avail: " << s->avail << endl; + + if ( s->avail ){ + DBClientBase* c = s->avail; + s->avail = 0; + debug( s , addr ) << "GOT " << c << endl; + pool.onHandedOut( c ); + return c; + } + + debug() << "CREATING NEW CONNECTION" << endl; + s->created++; + return pool.get( addr ); + } + + void done( const string& addr , DBClientBase* conn ){ + scoped_lock lk( _mutex ); + Status* s = _hosts[addr]; + assert( s ); + if ( s->avail ){ + debug( s , addr ) << "DONE WITH TEMP" << endl; + release( addr , conn ); + return; + } + s->avail = conn; + debug( s , addr ) << "PUSHING: " << conn << endl; + } + + void sync(){ + scoped_lock lk( _mutex ); + for ( map::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ){ + string addr = i->first; + Status* ss = i->second; + + if ( ss->avail ){ + ss->avail->getLastError(); + release( addr , ss->avail ); + ss->avail = 0; + } + delete ss; + } + _hosts.clear(); + } + + void checkVersions( const string& ns ){ + vector all; + Shard::getAllShards( all ); + scoped_lock lk( _mutex ); + for ( unsigned i=0; i::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ){ + if ( ! Shard::isAShard( i->first ) ) + continue; + Status* ss = i->second; + assert( ss ); + if ( ! ss->avail ) + ss->avail = pool.get( i->first ); + checkShardVersion( *ss->avail , ns ); + } + } + + void release( const string& addr , DBClientBase * conn ){ + resetShardVersion( conn ); + BSONObj res; + + try { + if ( conn->simpleCommand( "admin" , &res , "unsetSharding" ) ){ + pool.release( addr , conn ); + } + else { + log(LL_ERROR) << " couldn't unset sharding :( " << res << endl; + delete conn; + } + } + catch ( std::exception& e ){ + log(LL_ERROR) << "couldn't unsert sharding : " << e.what() << endl; + delete conn; + } + } + + void _check( const string& ns ){ + if ( ns.size() == 0 || _seenNS.count( ns ) ) + return; + _seenNS.insert( ns ); + checkVersions( ns ); + } + + map _hosts; + mongo::mutex _mutex; + set _seenNS; + // ----- + + static thread_specific_ptr _perThread; + + static ClientConnections* get(){ + ClientConnections* cc = _perThread.get(); + if ( ! cc ){ + cc = new ClientConnections(); + _perThread.reset( cc ); + } + return cc; + } + }; + + thread_specific_ptr ClientConnections::_perThread; + + ShardConnection::ShardConnection( const Shard * s , const string& ns ) + : _addr( s->getConnString() ) , _ns( ns ) { + _init(); + } + + ShardConnection::ShardConnection( const Shard& s , const string& ns ) + : _addr( s.getConnString() ) , _ns( ns ) { + _init(); + } + + ShardConnection::ShardConnection( const string& addr , const string& ns ) + : _addr( addr ) , _ns( ns ) { + _init(); + } + + void ShardConnection::_init(){ + assert( _addr.size() ); + _conn = ClientConnections::get()->get( _addr , _ns ); + _finishedInit = false; + } + + void ShardConnection::_finishInit(){ + if ( _finishedInit ) + return; + _finishedInit = true; + + if ( _ns.size() ){ + _setVersion = checkShardVersion( *_conn , _ns ); + } + else { + _setVersion = false; + } + + } + + void ShardConnection::done(){ + if ( _conn ){ + ClientConnections::get()->done( _addr , _conn ); + _conn = 0; + _finishedInit = true; + } + } + + void ShardConnection::kill(){ + if ( _conn ){ + delete _conn; + _conn = 0; + _finishedInit = true; + } + } + + void ShardConnection::sync(){ + ClientConnections::get()->sync(); + } + + bool ShardConnection::runCommand( const string& db , const BSONObj& cmd , BSONObj& res ){ + assert( _conn ); + bool ok = _conn->runCommand( db , cmd , res ); + if ( ! ok ){ + if ( res["code"].numberInt() == StaleConfigInContextCode ){ + string big = res["errmsg"].String(); + string ns,raw; + massert( 13409 , (string)"can't parse ns from: " + big , StaleConfigException::parse( big , ns , raw ) ); + done(); + throw StaleConfigException( ns , raw ); + } + } + return ok; + } + + void ShardConnection::checkMyConnectionVersions( const string & ns ){ + ClientConnections::get()->checkVersions( ns ); + } + + ShardConnection::~ShardConnection() { + if ( _conn ){ + if ( ! _conn->isFailed() ) { + /* see done() comments above for why we log this line */ + log() << "~ScopedDBConnection: _conn != null" << endl; + } + kill(); + } + } +} diff -Nru mongodb-1.4.4/s/shard.cpp mongodb-1.6.3/s/shard.cpp --- mongodb-1.4.4/s/shard.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/shard.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,250 @@ +// shard.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "shard.h" +#include "config.h" +#include "request.h" +#include + +namespace mongo { + + class StaticShardInfo { + public: + StaticShardInfo() : _mutex("StaticShardInfo") { } + void reload(){ + + list all; + { + ScopedDbConnection conn( configServer.getPrimary() ); + auto_ptr c = conn->query( ShardNS::shard , Query() ); + assert( c.get() ); + while ( c->more() ){ + all.push_back( c->next().getOwned() ); + } + conn.done(); + } + + scoped_lock lk( _mutex ); + + // We use the _lookup table for all shards and for the primary config DB. The config DB info, + // however, does not come from the ShardNS::shard. So when cleaning the _lookup table we leave + // the config state intact. The rationale is that this way we could drop shards that + // were removed without reinitializing the config DB information. + + map::iterator i = _lookup.find( "config" ); + if ( i != _lookup.end() ){ + Shard config = i->second; + _lookup.clear(); + _lookup[ "config" ] = config; + } else { + _lookup.clear(); + } + + for ( list::iterator i=all.begin(); i!=all.end(); ++i ){ + BSONObj o = *i; + string name = o["_id"].String(); + string host = o["host"].String(); + + long long maxSize = 0; + BSONElement maxSizeElem = o[ ShardFields::maxSize.name() ]; + if ( ! maxSizeElem.eoo() ){ + maxSize = maxSizeElem.numberLong(); + } + + bool isDraining = false; + BSONElement isDrainingElem = o[ ShardFields::draining.name() ]; + if ( ! isDrainingElem.eoo() ){ + isDraining = isDrainingElem.Bool(); + } + + Shard s( name , host , maxSize , isDraining ); + _lookup[name] = s; + _lookup[host] = s; + + // add rs name to lookup (if it exists) + size_t pos; + if ((pos = host.find('/', 0)) != string::npos) { + _lookup[host.substr(0, pos)] = s; + } + } + + } + + bool isMember( const string& addr ){ + scoped_lock lk( _mutex ); + map::iterator i = _lookup.find( addr ); + return i != _lookup.end(); + } + + const Shard& find( const string& ident ){ + { + scoped_lock lk( _mutex ); + map::iterator i = _lookup.find( ident ); + + // if normal find didn't find anything, try to find by rs name + size_t pos; + if ( i == _lookup.end() && (pos = ident.find('/', 0)) != string::npos) { + i = _lookup.find( ident.substr(0, pos) ); + } + + if ( i != _lookup.end() ) + return i->second; + } + + // not in our maps, re-load all + reload(); + + scoped_lock lk( _mutex ); + map::iterator i = _lookup.find( ident ); + uassert( 13129 , (string)"can't find shard for: " + ident , i != _lookup.end() ); + return i->second; + } + + void set( const string& name , const string& addr , bool setName = true , bool setAddr = true ){ + Shard s(name,addr); + scoped_lock lk( _mutex ); + if ( setName ) + _lookup[name] = s; + if ( setAddr ) + _lookup[addr] = s; + } + + void remove( const string& name ){ + scoped_lock lk( _mutex ); + for ( map::iterator i = _lookup.begin(); i!=_lookup.end(); ){ + Shard s = i->second; + if ( s.getName() == name ){ + _lookup.erase(i++); + } else { + ++i; + } + } + } + + void getAllShards( vector& all ){ + scoped_lock lk( _mutex ); + std::set seen; + for ( map::iterator i = _lookup.begin(); i!=_lookup.end(); ++i ){ + Shard s = i->second; + if ( s.getName() == "config" ) + continue; + if ( seen.count( s.getName() ) ) + continue; + seen.insert( s.getName() ); + all.push_back( s ); + } + } + + private: + map _lookup; + mongo::mutex _mutex; + } staticShardInfo; + + void Shard::setAddress( const string& addr , bool authoritative ){ + assert( _name.size() ); + _addr = addr; + if ( authoritative ) + staticShardInfo.set( _name , _addr , true , false ); + } + + void Shard::reset( const string& ident ){ + const Shard& s = staticShardInfo.find( ident ); + uassert( 13128 , (string)"can't find shard for: " + ident , s.ok() ); + _name = s._name; + _addr = s._addr; + _maxSize = s._maxSize; + _isDraining = s._isDraining; + } + + void Shard::getAllShards( vector& all ){ + staticShardInfo.getAllShards( all ); + } + + bool Shard::isAShard( const string& ident ){ + return staticShardInfo.isMember( ident ); + } + + void Shard::printShardInfo( ostream& out ){ + vector all; + getAllShards( all ); + for ( unsigned i=0; irunCommand( db , cmd , res ); + if ( ! ok ){ + stringstream ss; + ss << "runCommand (" << cmd << ") on shard (" << _name << ") failed : " << res; + throw UserException( 13136 , ss.str() ); + } + res = res.getOwned(); + conn.done(); + return res; + } + + ShardStatus Shard::getStatus() const { + return ShardStatus( *this , runCommand( "admin" , BSON( "serverStatus" << 1 ) ) ); + } + + void Shard::reloadShardInfo(){ + staticShardInfo.reload(); + } + + + bool Shard::isMember( const string& addr ){ + return staticShardInfo.isMember( addr ); + } + + void Shard::removeShard( const string& name ){ + staticShardInfo.remove( name ); + } + + Shard Shard::pick(){ + vector all; + staticShardInfo.getAllShards( all ); + if ( all.size() == 0 ){ + staticShardInfo.reload(); + staticShardInfo.getAllShards( all ); + if ( all.size() == 0 ) + return EMPTY; + } + + ShardStatus best = all[0].getStatus(); + + for ( size_t i=1; i. +*/ + +#pragma once + +#include "../pch.h" +#include "../client/connpool.h" + +namespace mongo { + + class ShardConnection; + class ShardStatus; + + class Shard { + public: + Shard() + : _name("") , _addr("") , _maxSize(0) , _isDraining( false ){ + } + + Shard( const string& name , const string& addr, long long maxSize = 0 , bool isDraining = false ) + : _name(name) , _addr( addr ) , _maxSize( maxSize ) , _isDraining( isDraining ){ + } + + Shard( const string& ident ){ + reset( ident ); + } + + Shard( const Shard& other ) + : _name( other._name ) , _addr( other._addr ) , _maxSize( other._maxSize ) , _isDraining( other._isDraining ){ + } + + Shard( const Shard* other ) + : _name( other->_name ) , _addr( other->_addr ), _maxSize( other->_maxSize ) , _isDraining( other->_isDraining ){ + } + + static Shard make( const string& ident ){ + Shard s; + s.reset( ident ); + return s; + } + + static bool isAShard( const string& ident ); + + /** + * @param ident either name or address + */ + void reset( const string& ident ); + + void setAddress( const string& addr , bool authoritative = false ); + + string getName() const { + assert( _name.size() ); + return _name; + } + + string getConnString() const { + assert( _addr.size() ); + return _addr; + } + + long long getMaxSize() const { + return _maxSize; + } + + bool isDraining() const { + return _isDraining; + } + + string toString() const { + return _name + ":" + _addr; + } + + friend ostream& operator << (ostream& out, const Shard& s) { + return (out << s.toString()); + } + + bool operator==( const Shard& s ) const { + bool n = _name == s._name; + bool a = _addr == s._addr; + + assert( n == a ); // names and address are 1 to 1 + return n; + } + + bool operator!=( const Shard& s ) const { + bool n = _name == s._name; + bool a = _addr == s._addr; + return ! ( n && a ); + } + + + bool operator==( const string& s ) const { + return _name == s || _addr == s; + } + + bool operator!=( const string& s ) const { + return _name != s && _addr != s; + } + + bool operator<(const Shard& o) const { + return _name < o._name; + } + + bool ok() const { + return _addr.size() > 0 && _addr.size() > 0; + } + + BSONObj runCommand( const string& db , const string& simple ) const { + return runCommand( db , BSON( simple << 1 ) ); + } + BSONObj runCommand( const string& db , const BSONObj& cmd ) const ; + + ShardStatus getStatus() const ; + + static void getAllShards( vector& all ); + static void printShardInfo( ostream& out ); + + /** + * picks a Shard for more load + */ + static Shard pick(); + + static void reloadShardInfo(); + + static void removeShard( const string& name ); + + static bool isMember( const string& addr ); + + static Shard EMPTY; + + private: + string _name; + string _addr; + long long _maxSize; // in MBytes, 0 is unlimited + bool _isDraining; // shard is currently being removed + }; + + class ShardStatus { + public: + + ShardStatus( const Shard& shard , const BSONObj& obj ); + + friend ostream& operator << (ostream& out, const ShardStatus& s) { + out << s.toString(); + return out; + } + + string toString() const { + stringstream ss; + ss << "shard: " << _shard << " mapped: " << _mapped << " writeLock: " << _writeLock; + return ss.str(); + } + + bool operator<( const ShardStatus& other ) const{ + return _mapped < other._mapped; + } + + Shard shard() const { + return _shard; + } + + long long mapped() const { + return _mapped; + } + + private: + Shard _shard; + long long _mapped; + double _writeLock; + }; + + class ShardConnection : public AScopedConnection { + public: + ShardConnection( const Shard * s , const string& ns ); + ShardConnection( const Shard& s , const string& ns ); + ShardConnection( const string& addr , const string& ns ); + + ~ShardConnection(); + + void done(); + void kill(); + + DBClientBase& conn(){ + _finishInit(); + assert( _conn ); + return *_conn; + } + + DBClientBase* operator->(){ + _finishInit(); + assert( _conn ); + return _conn; + } + + DBClientBase* get(){ + _finishInit(); + assert( _conn ); + return _conn; + } + + string getHost() const { + return _addr; + } + + bool setVersion() { + _finishInit(); + return _setVersion; + } + + static void sync(); + + void donotCheckVersion(){ + _setVersion = false; + _finishedInit = true; + } + + /** + this just passes through excpet it checks for stale configs + */ + bool runCommand( const string& db , const BSONObj& cmd , BSONObj& res ); + + /** checks all of my thread local connections for the version of this ns */ + static void checkMyConnectionVersions( const string & ns ); + + private: + void _init(); + void _finishInit(); + + bool _finishedInit; + + string _addr; + string _ns; + DBClientBase* _conn; + bool _setVersion; + }; +} diff -Nru mongodb-1.4.4/s/shardkey.cpp mongodb-1.6.3/s/shardkey.cpp --- mongodb-1.4.4/s/shardkey.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/shardkey.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,219 +16,91 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "chunk.h" #include "../db/jsobj.h" #include "../util/unittest.h" -/** - TODO: this only works with numbers right now - this is very temporary, need to make work with anything -*/ - namespace mongo { - void minForPat(BSONObjBuilder& out, const BSONObj& pat){ - BSONElement e = pat.firstElement(); - if (e.type() == Object){ - BSONObjBuilder sub; - minForPat(sub, e.embeddedObject()); - out.append(e.fieldName(), sub.obj()); - } else { - out.appendMinKey(e.fieldName()); - } - } - - void maxForPat(BSONObjBuilder& out, const BSONObj& pat){ - BSONElement e = pat.firstElement(); - if (e.type() == Object){ - BSONObjBuilder sub; - maxForPat(sub, e.embeddedObject()); - out.append(e.fieldName(), sub.obj()); - } else { - out.appendMaxKey(e.fieldName()); - } - } ShardKeyPattern::ShardKeyPattern( BSONObj p ) : pattern( p.getOwned() ) { pattern.getFieldNames(patternfields); BSONObjBuilder min; - minForPat(min, pattern); - gMin = min.obj(); - BSONObjBuilder max; - maxForPat(max, pattern); + + BSONObjIterator it(p); + while (it.more()){ + BSONElement e (it.next()); + min.appendMinKey(e.fieldName()); + max.appendMaxKey(e.fieldName()); + } + + gMin = min.obj(); gMax = max.obj(); } - int ShardKeyPattern::compare( const BSONObj& lObject , const BSONObj& rObject ) { + int ShardKeyPattern::compare( const BSONObj& lObject , const BSONObj& rObject ) const { BSONObj L = extractKey(lObject); - uassert( 10198 , "left object doesn't have shard key", !L.isEmpty()); + uassert( 10198 , "left object doesn't have full shard key", L.nFields() == (int)patternfields.size()); BSONObj R = extractKey(rObject); - uassert( 10199 , "right object doesn't have shard key", !R.isEmpty()); + uassert( 10199 , "right object doesn't have full shard key", R.nFields() == (int)patternfields.size()); return L.woCompare(R); } - bool ShardKeyPattern::hasShardKey( const BSONObj& obj ) { + bool ShardKeyPattern::hasShardKey( const BSONObj& obj ) const { /* this is written s.t. if obj has lots of fields, if the shard key fields are early, it is fast. so a bit more work to try to be semi-fast. */ - for(set::iterator it = patternfields.begin(); it != patternfields.end(); ++it){ + for(set::const_iterator it = patternfields.begin(); it != patternfields.end(); ++it){ if(obj.getFieldDotted(it->c_str()).eoo()) return false; } return true; } - /** @return true if shard s is relevant for query q. - - Example: - q: { x : 3 } - *this: { x : 1 } - s: x:2..x:7 - -> true - */ - - bool ShardKeyPattern::relevant(const BSONObj& query, const BSONObj& L, const BSONObj& R) { - BSONObj q = extractKey( query ); - if( q.isEmpty() ) - return true; - - BSONElement e = q.firstElement(); - assert( !e.eoo() ) ; - - if( e.type() == RegEx ) { - /* todo: if starts with ^, we could be smarter here */ - return true; - } - - if( e.type() == Object ) { - BSONObjIterator j(e.embeddedObject()); - BSONElement LE = L.firstElement(); // todo compound keys - BSONElement RE = R.firstElement(); // todo compound keys - while( 1 ) { - BSONElement f = j.next(); - if( f.eoo() ) - break; - int op = f.getGtLtOp(); - switch( op ) { - case BSONObj::LT: - if( f.woCompare(LE, false) <= 0 ) - return false; - break; - case BSONObj::LTE: - if( f.woCompare(LE, false) < 0 ) - return false; - break; - case BSONObj::GT: - case BSONObj::GTE: - if( f.woCompare(RE, false) >= 0 ) - return false; - break; - case BSONObj::opIN: - case BSONObj::NE: - case BSONObj::opSIZE: - massert( 10423 , "not implemented yet relevant()", false); - case BSONObj::Equality: - goto normal; - default: - massert( 10424 , "bad operator in relevant()?", false); - } - } - return true; - } -normal: - return L.woCompare(q) <= 0 && R.woCompare(q) > 0; - } - - bool ShardKeyPattern::relevantForQuery( const BSONObj& query , Chunk * chunk ){ - massert( 10425 , "not done for compound patterns", patternfields.size() == 1); - - bool rel = relevant(query, chunk->getMin(), chunk->getMax()); - if( ! hasShardKey( query ) ) - assert(rel); - - return rel; - } - - /** - returns a query that filters results only for the range desired, i.e. returns - { $gte : keyval(min), $lt : keyval(max) } - */ - void ShardKeyPattern::getFilter( BSONObjBuilder& b , const BSONObj& min, const BSONObj& max ){ - massert( 10426 , "not done for compound patterns", patternfields.size() == 1); - BSONObjBuilder temp; - temp.appendAs( extractKey(min).firstElement(), "$gte" ); - temp.appendAs( extractKey(max).firstElement(), "$lt" ); - - b.append( patternfields.begin()->c_str(), temp.obj() ); - } - - /** - Example - sort: { ts: -1 } - *this: { ts:1 } - -> -1 - - @return - 0 if sort either doesn't have all the fields or has extra fields - < 0 if sort is descending - > 1 if sort is ascending - */ - int ShardKeyPattern::canOrder( const BSONObj& sort ){ - // e.g.: - // sort { a : 1 , b : -1 } - // pattern { a : -1, b : 1, c : 1 } - // -> -1 - - int dir = 0; - - BSONObjIterator s(sort); - BSONObjIterator p(pattern); - while( 1 ) { - BSONElement e = s.next(); - if( e.eoo() ) - break; - if( !p.moreWithEOO() ) - return 0; - BSONElement ep = p.next(); - bool same = e == ep; - if( !same ) { - if( strcmp(e.fieldName(), ep.fieldName()) != 0 ) - return 0; - // same name, but opposite direction - if( dir == -1 ) - ; // ok - else if( dir == 1 ) - return 0; // wrong direction for a 2nd field - else // dir == 0, initial pass - dir = -1; - } - else { - // fields are the same - if( dir == -1 ) - return 0; // wrong direction - dir = 1; - } + bool ShardKeyPattern::isPrefixOf( const BSONObj& otherPattern ) const { + BSONObjIterator a( pattern ); + BSONObjIterator b( otherPattern ); + + while ( a.more() && b.more() ){ + BSONElement x = a.next(); + BSONElement y = b.next(); + if ( strcmp( x.fieldName() , y.fieldName() ) ) + return false; } - - return dir; + + return ! a.more(); } - + string ShardKeyPattern::toString() const { return pattern.toString(); } - + /* things to test for compound : - x hasshardkey - _ getFilter (hard?) - _ relevantForQuery - x canOrder \ middle (deprecating?) */ class ShardKeyUnitTest : public UnitTest { public: + + void testIsPrefixOf(){ + { + ShardKeyPattern k( BSON( "x" << 1 ) ); + assert( ! k.isPrefixOf( BSON( "a" << 1 ) ) ); + assert( k.isPrefixOf( BSON( "x" << 1 ) ) ); + assert( k.isPrefixOf( BSON( "x" << 1 << "a" << 1 ) ) ); + assert( ! k.isPrefixOf( BSON( "a" << 1 << "x" << 1 ) ) ); + } + { + ShardKeyPattern k( BSON( "x" << 1 << "y" << 1 ) ); + assert( ! k.isPrefixOf( BSON( "x" << 1 ) ) ); + assert( ! k.isPrefixOf( BSON( "x" << 1 << "z" << 1 ) ) ); + assert( k.isPrefixOf( BSON( "x" << 1 << "y" << 1 ) ) ); + assert( k.isPrefixOf( BSON( "x" << 1 << "y" << 1 << "z" << 1 ) ) ); + } + } + void hasshardkeytest() { BSONObj x = fromjson("{ zid : \"abcdefg\", num: 1.0, name: \"eliot\" }"); ShardKeyPattern k( BSON( "num" << 1 ) ); @@ -244,40 +116,13 @@ } } - void rfq() { - ShardKeyPattern k( BSON( "key" << 1 ) ); - BSONObj q = BSON( "key" << 3 ); - Chunk c(0); - BSONObj z = fromjson("{ ns : \"alleyinsider.fs.chunks\" , min : {key:2} , max : {key:20} , server : \"localhost:30001\" }"); - c.unserialize(z); - assert( k.relevantForQuery(q, &c) ); - assert( k.relevantForQuery(fromjson("{foo:9,key:4}"), &c) ); - assert( !k.relevantForQuery(fromjson("{foo:9,key:43}"), &c) ); - assert( k.relevantForQuery(fromjson("{foo:9,key:{$gt:10}}"), &c) ); - assert( !k.relevantForQuery(fromjson("{foo:9,key:{$gt:22}}"), &c) ); - assert( k.relevantForQuery(fromjson("{foo:9}"), &c) ); - } - void getfilt() { - ShardKeyPattern k( BSON( "key" << 1 ) ); - BSONObjBuilder b; - k.getFilter(b, fromjson("{z:3,key:30}"), fromjson("{key:90}")); - BSONObj x = fromjson("{ key: { $gte: 30, $lt: 90 } }"); - assert( x.woEqual(b.obj()) ); - } - void testCanOrder() { - ShardKeyPattern k( fromjson("{a:1,b:-1,c:1}") ); - assert( k.canOrder( fromjson("{a:1}") ) == 1 ); - assert( k.canOrder( fromjson("{a:-1}") ) == -1 ); - assert( k.canOrder( fromjson("{a:1,b:-1,c:1}") ) == 1 ); - assert( k.canOrder( fromjson("{a:1,b:1}") ) == 0 ); - assert( k.canOrder( fromjson("{a:-1,b:1}") ) == -1 ); - } + void extractkeytest() { - ShardKeyPattern k( fromjson("{a:1,b:-1,c:1}") ); + ShardKeyPattern k( fromjson("{a:1,'sub.b':-1,'sub.c':1}") ); - BSONObj x = fromjson("{a:1,b:2,c:3}"); - assert( k.extractKey( fromjson("{a:1,b:2,c:3}") ).woEqual(x) ); - assert( k.extractKey( fromjson("{b:2,c:3,a:1}") ).woEqual(x) ); + BSONObj x = fromjson("{a:1,'sub.b':2,'sub.c':3}"); + assert( k.extractKey( fromjson("{a:1,sub:{b:2,c:3}}") ).woEqual(x) ); + assert( k.extractKey( fromjson("{sub:{b:2,c:3},a:1}") ).woEqual(x) ); } void run(){ extractkeytest(); @@ -305,15 +150,11 @@ BSONObj b = BSON( "key" << 999 ); assert( k.compare(a,b) < 0 ); - - assert( k.canOrder( fromjson("{key:1}") ) == 1 ); - assert( k.canOrder( fromjson("{zz:1}") ) == 0 ); - assert( k.canOrder( fromjson("{key:-1}") ) == -1 ); - testCanOrder(); - getfilt(); - rfq(); + testIsPrefixOf(); // add middle multitype tests + + log(1) << "shardKeyTest passed" << endl; } } shardKeyTest; diff -Nru mongodb-1.4.4/s/shardkey.h mongodb-1.6.3/s/shardkey.h --- mongodb-1.4.4/s/shardkey.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/shardkey.h 2010-09-24 10:02:42.000000000 -0700 @@ -42,15 +42,15 @@ */ BSONObj globalMax() const { return gMax; } - bool isGlobalMin( const BSONObj& k ){ + bool isGlobalMin( const BSONObj& k ) const{ return k.woCompare( globalMin() ) == 0; } - bool isGlobalMax( const BSONObj& k ){ + bool isGlobalMax( const BSONObj& k ) const{ return k.woCompare( globalMax() ) == 0; } - bool isGlobal( const BSONObj& k ){ + bool isGlobal( const BSONObj& k ) const{ return isGlobalMin( k ) || isGlobalMax( k ); } @@ -59,46 +59,16 @@ l == r 0 l > r positive */ - int compare( const BSONObj& l , const BSONObj& r ); + int compare( const BSONObj& l , const BSONObj& r ) const; /** @return whether or not obj has all fields in this shard key pattern e.g. ShardKey({num:1}).hasShardKey({ name:"joe", num:3 }) is true */ - bool hasShardKey( const BSONObj& obj ); + bool hasShardKey( const BSONObj& obj ) const; - /** - returns a query that filters results only for the range desired, i.e. returns - { "field" : { $gte: keyval(min), $lt: keyval(max) } } - */ - void getFilter( BSONObjBuilder& b , const BSONObj& min, const BSONObj& max ); - - /** @return true if shard s is relevant for query q. - - Example: - q: { x : 3 } - *this: { x : 1 } - s: x:2..x:7 - -> true - */ - bool relevantForQuery( const BSONObj& q , Chunk * s ); - - /** - Returns if the given sort pattern can be ordered by the shard key pattern. - Example - sort: { ts: -1 } - *this: { ts:1 } - -> -1 - - @return - 0 if sort either doesn't have all the fields or has extra fields - < 0 if sort is descending - > 1 if sort is ascending - */ - int canOrder( const BSONObj& sort ); - - BSONObj key() { return pattern; } + BSONObj key() const { return pattern; } string toString() const; @@ -108,21 +78,25 @@ return patternfields.count( key ) > 0; } - operator string() const { - return pattern.toString(); - } + /** + * @return + * true if 'this' is a prefix (not necessarily contained) of 'otherPattern'. + */ + bool isPrefixOf( const BSONObj& otherPattern ) const; + private: BSONObj pattern; BSONObj gMin; BSONObj gMax; - /* question: better to have patternfields precomputed or not? depends on if we use copy contructor often. */ + /* question: better to have patternfields precomputed or not? depends on if we use copy constructor often. */ set patternfields; - bool relevant(const BSONObj& query, const BSONObj& L, const BSONObj& R); }; inline BSONObj ShardKeyPattern::extractKey(const BSONObj& from) const { - return from.extractFields(pattern); + BSONObj k = from.extractFields(pattern); + uassert(13334, "Shard Key must be less than 512 bytes", k.objsize() < 512); + return k; } } diff -Nru mongodb-1.4.4/s/s_only.cpp mongodb-1.6.3/s/s_only.cpp --- mongodb-1.4.4/s/s_only.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/s_only.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,11 +15,16 @@ * limitations under the License. */ -#include "../stdafx.h" +#include "pch.h" #include "../client/dbclient.h" #include "../db/dbhelpers.h" #include "../db/matcher.h" +#include "../db/commands.h" +/* + most a pile of hacks to make linking nicer + + */ namespace mongo { auto_ptr Helpers::find( const char *ns , BSONObj query , bool requireIndex ){ @@ -28,6 +33,59 @@ return i; } - // need this stub to reduce mongos link dependencies - inline Matcher::~Matcher() { assert(!"this shouldn't be called"); } + boost::thread_specific_ptr currentClient; + + Client::Client(const char *desc , MessagingPort *p) : + _context(0), + _shutdown(false), + _desc(desc), + _god(0), + _lastOp(0), + _mp(p) + { + } + Client::~Client(){} + bool Client::shutdown(){ return true; } + + bool webHaveAdminUsers(){ + return false; + } + + BSONObj webGetAdminUser( const string& username ){ + return BSONObj(); + } + + bool execCommand( Command * c , + Client& client , int queryOptions , + const char *ns, BSONObj& cmdObj , + BSONObjBuilder& result, + bool fromRepl ){ + assert(c); + + string dbname = nsToDatabase( ns ); + + if ( cmdObj["help"].trueValue() ){ + stringstream ss; + ss << "help for: " << c->name << " "; + c->help( ss ); + result.append( "help" , ss.str() ); + result.append( "lockType" , c->locktype() ); + return true; + } + + if ( c->adminOnly() ){ + if ( dbname != "admin" ) { + result.append( "errmsg" , "access denied- use admin db" ); + log() << "command denied: " << cmdObj.toString() << endl; + return false; + } + log( 2 ) << "command: " << cmdObj << endl; + } + + string errmsg; + int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl ); + if ( ! ok ) + result.append( "errmsg" , errmsg ); + return ok; + } } diff -Nru mongodb-1.4.4/s/stats.cpp mongodb-1.6.3/s/stats.cpp --- mongodb-1.4.4/s/stats.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/stats.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,28 @@ +// stats.cpp + +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "pch.h" +#include "stats.h" + +namespace mongo { + + OpCounters opsNonSharded; + OpCounters opsSharded; + + GenericCounter shardedCursorTypes; +} diff -Nru mongodb-1.4.4/s/stats.h mongodb-1.6.3/s/stats.h --- mongodb-1.4.4/s/stats.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/s/stats.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,30 @@ +// stats.h + +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "../pch.h" +#include "../db/stats/counters.h" + +namespace mongo { + + extern OpCounters opsNonSharded; + extern OpCounters opsSharded; + + extern GenericCounter shardedCursorTypes; +} diff -Nru mongodb-1.4.4/s/strategy.cpp mongodb-1.6.3/s/strategy.cpp --- mongodb-1.4.4/s/strategy.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/strategy.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,77 +16,88 @@ // stragegy.cpp -#include "stdafx.h" +#include "pch.h" #include "request.h" #include "../util/background.h" #include "../client/connpool.h" #include "../db/commands.h" + #include "server.h" +#include "grid.h" namespace mongo { // ----- Strategy ------ - void Strategy::doWrite( int op , Request& r , string server ){ - ScopedDbConnection dbcon( server ); - DBClientBase &_c = dbcon.conn(); - - /* TODO FIX - do not case and call DBClientBase::say() */ - DBClientConnection&c = dynamic_cast(_c); - c.port().say( r.m() ); - - dbcon.done(); + void Strategy::doWrite( int op , Request& r , const Shard& shard , bool checkVersion ){ + ShardConnection conn( shard , r.getns() ); + if ( ! checkVersion ) + conn.donotCheckVersion(); + else if ( conn.setVersion() ){ + conn.done(); + throw StaleConfigException( r.getns() , "doWRite" , true ); + } + conn->say( r.m() ); + conn.done(); } - - void Strategy::doQuery( Request& r , string server ){ + + void Strategy::doQuery( Request& r , const Shard& shard ){ try{ - ScopedDbConnection dbcon( server ); + ShardConnection dbcon( shard , r.getns() ); DBClientBase &c = dbcon.conn(); - checkShardVersion( c , r.getns() ); - Message response; bool ok = c.call( r.m(), response); { - QueryResult *qr = (QueryResult *) response.data; - if ( qr->resultFlags() & QueryResult::ResultFlag_ShardConfigStale ){ + QueryResult *qr = (QueryResult *) response.singleData(); + if ( qr->resultFlags() & ResultFlag_ShardConfigStale ){ dbcon.done(); throw StaleConfigException( r.getns() , "Strategy::doQuery" ); } } uassert( 10200 , "mongos: error calling db", ok); - r.reply( response ); + r.reply( response , c.getServerAddress() ); dbcon.done(); } catch ( AssertionException& e ) { BSONObjBuilder err; - err.append("$err", string("mongos: ") + (e.msg.empty() ? "assertion during query" : e.msg)); + e.getInfo().append( err ); BSONObj errObj = err.done(); - replyToQuery(QueryResult::ResultFlag_ErrSet, r.p() , r.m() , errObj); + replyToQuery(ResultFlag_ErrSet, r.p() , r.m() , errObj); } } - void Strategy::insert( string server , const char * ns , const BSONObj& obj ){ - ScopedDbConnection dbcon( server ); - checkShardVersion( dbcon.conn() , ns ); + void Strategy::insert( const Shard& shard , const char * ns , const BSONObj& obj ){ + ShardConnection dbcon( shard , ns ); + if ( dbcon.setVersion() ){ + dbcon.done(); + throw StaleConfigException( ns , "for insert" ); + } dbcon->insert( ns , obj ); dbcon.done(); } - map checkShardVersionLastSequence; - class WriteBackListener : public BackgroundJob { protected: - + string name() { return "WriteBackListener"; } WriteBackListener( const string& addr ) : _addr( addr ){ - cout << "creating WriteBackListener for: " << addr << endl; + log() << "creating WriteBackListener for: " << addr << endl; } void run(){ + OID lastID; + lastID.clear(); int secsToSleep = 0; - while ( 1 ){ + while ( Shard::isMember( _addr ) ){ + + if ( lastID.isSet() ){ + scoped_lock lk( _seenWritebacksLock ); + _seenWritebacks.insert( lastID ); + lastID.clear(); + } + try { ScopedDbConnection conn( _addr ); @@ -94,7 +105,7 @@ { BSONObjBuilder cmd; - cmd.appendOID( "writebacklisten" , &serverID ); + cmd.appendOID( "writebacklisten" , &serverID ); // Command will block for data if ( ! conn->runCommand( "admin" , cmd.obj() , result ) ){ log() << "writebacklisten command failed! " << result << endl; conn.done(); @@ -108,15 +119,35 @@ BSONObj data = result.getObjectField( "data" ); if ( data.getBoolField( "writeBack" ) ){ string ns = data["ns"].valuestrsafe(); - + { + BSONElement e = data["id"]; + if ( e.type() == jstOID ) + lastID = e.OID(); + } int len; Message m( (void*)data["msg"].binData( len ) , false ); - massert( 10427 , "invalid writeback message" , m.data->valid() ); + massert( 10427 , "invalid writeback message" , m.header()->valid() ); - grid.getDBConfig( ns )->getChunkManager( ns , true ); + DBConfigPtr db = grid.getDBConfig( ns ); + ShardChunkVersion needVersion( data["version"] ); + + log(1) << "writeback id: " << lastID << " needVersion : " << needVersion.toString() + << " mine : " << db->getChunkManager( ns )->getVersion().toString() << endl;// TODO change to log(3) + + if ( logLevel ) log(1) << debugString( m ) << endl; + + if ( needVersion.isSet() && needVersion <= db->getChunkManager( ns )->getVersion() ){ + // this means when the write went originally, the version was old + // if we're here, it means we've already updated the config, so don't need to do again + //db->getChunkManager( ns , true ); // SERVER-1349 + } + else { + db->getChunkManager( ns , true ); + } Request r( m , 0 ); + r.init(); r.process(); } else { @@ -125,9 +156,13 @@ conn.done(); secsToSleep = 0; + continue; } catch ( std::exception e ){ log() << "WriteBackListener exception : " << e.what() << endl; + + // It's possible this shard was removed + Shard::reloadShardInfo(); } catch ( ... ){ log() << "WriteBackListener uncaught exception!" << endl; @@ -137,99 +172,154 @@ if ( secsToSleep > 10 ) secsToSleep = 0; } + + log() << "WriteBackListener exiting : address no longer in cluster " << _addr; + } private: string _addr; - static map _cache; + static map _cache; + static mongo::mutex _cacheLock; + + static set _seenWritebacks; + static mongo::mutex _seenWritebacksLock; + public: static void init( DBClientBase& conn ){ + scoped_lock lk( _cacheLock ); WriteBackListener*& l = _cache[conn.getServerAddress()]; if ( l ) return; l = new WriteBackListener( conn.getServerAddress() ); l->go(); } + + static void waitFor( const OID& oid ){ + Timer t; + for ( int i=0; i<5000; i++ ){ + { + scoped_lock lk( _seenWritebacksLock ); + if ( _seenWritebacks.count( oid ) ) + return; + } + sleepmillis( 10 ); + } + stringstream ss; + ss << "didn't get writeback for: " << oid << " after: " << t.millis() << " ms"; + uasserted( 13403 , ss.str() ); + } }; - map WriteBackListener::_cache; + void waitForWriteback( const OID& oid ){ + WriteBackListener::waitFor( oid ); + } + map WriteBackListener::_cache; + mongo::mutex WriteBackListener::_cacheLock("WriteBackListener"); - void checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative ){ + set WriteBackListener::_seenWritebacks; + mongo::mutex WriteBackListener::_seenWritebacksLock( "WriteBackListener::seen" ); + + struct ConnectionShardStatus { + + typedef unsigned long long S; + + ConnectionShardStatus() + : _mutex( "ConnectionShardStatus" ){ + } + + S getSequence( DBClientBase * conn , const string& ns ){ + scoped_lock lk( _mutex ); + return _map[conn][ns]; + } + + void setSequence( DBClientBase * conn , const string& ns , const S& s ){ + scoped_lock lk( _mutex ); + _map[conn][ns] = s; + } + + void reset( DBClientBase * conn ){ + scoped_lock lk( _mutex ); + _map.erase( conn ); + } + + map > _map; + mongo::mutex _mutex; + } connectionShardStatus; + + void resetShardVersion( DBClientBase * conn ){ + connectionShardStatus.reset( conn ); + } + + /** + * @return true if had to do something + */ + bool checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative , int tryNumber ){ // TODO: cache, optimize, etc... WriteBackListener::init( conn ); - DBConfig * conf = grid.getDBConfig( ns ); + DBConfigPtr conf = grid.getDBConfig( ns ); if ( ! conf ) - return; + return false; - ShardChunkVersion version = 0; unsigned long long officialSequenceNumber = 0; - - if ( conf->isSharded( ns ) ){ - ChunkManager * manager = conf->getChunkManager( ns , authoritative ); + + ChunkManagerPtr manager; + const bool isSharded = conf->isSharded( ns ); + if ( isSharded ){ + manager = conf->getChunkManager( ns , authoritative ); officialSequenceNumber = manager->getSequenceNumber(); - version = manager->getVersion( conn.getServerAddress() ); } - unsigned long long & sequenceNumber = checkShardVersionLastSequence[ &conn ]; - if ( officialSequenceNumber == sequenceNumber ) - return; - - log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber << endl; + unsigned long long sequenceNumber = connectionShardStatus.getSequence(&conn,ns); + if ( sequenceNumber == officialSequenceNumber ){ + return false; + } + + ShardChunkVersion version = 0; + if ( isSharded ){ + version = manager->getVersion( Shard::make( conn.getServerAddress() ) ); + } + + log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns + << " my last seq: " << sequenceNumber << " current: " << officialSequenceNumber + << " version: " << version << " manager: " << manager.get() + << endl; + BSONObj result; if ( setShardVersion( conn , ns , version , authoritative , result ) ){ // success! log(1) << " setShardVersion success!" << endl; - sequenceNumber = officialSequenceNumber; - return; + connectionShardStatus.setSequence( &conn , ns , officialSequenceNumber ); + return true; } - + log(1) << " setShardVersion failed!\n" << result << endl; if ( result.getBoolField( "need_authoritative" ) ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ){ - checkShardVersion( conn , ns , 1 ); - return; + checkShardVersion( conn , ns , 1 , tryNumber + 1 ); + return true; } - log(1) << " setShardVersion failed: " << result << endl; - massert( 10429 , "setShardVersion failed!" , 0 ); - } - - bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result ){ - - BSONObjBuilder cmdBuilder; - cmdBuilder.append( "setShardVersion" , ns.c_str() ); - cmdBuilder.append( "configdb" , configServer.modelServer() ); - cmdBuilder.appendTimestamp( "version" , version ); - cmdBuilder.appendOID( "serverID" , &serverID ); - if ( authoritative ) - cmdBuilder.appendBool( "authoritative" , 1 ); - BSONObj cmd = cmdBuilder.obj(); - - log(1) << " setShardVersion " << conn.getServerAddress() << " " << ns << " " << cmd << " " << &conn << endl; - - return conn.runCommand( "admin" , cmd , result ); - } - - bool lockNamespaceOnServer( const string& server , const string& ns ){ - ScopedDbConnection conn( server ); - bool res = lockNamespaceOnServer( conn.conn() , ns ); - conn.done(); - return res; - } + if ( tryNumber < 4 ){ + log(1) << "going to retry checkShardVersion" << endl; + sleepmillis( 10 ); + checkShardVersion( conn , ns , 1 , tryNumber + 1 ); + return true; + } - bool lockNamespaceOnServer( DBClientBase& conn , const string& ns ){ - BSONObj lockResult; - return setShardVersion( conn , ns , grid.getNextOpTime() , true , lockResult ); + log() << " setShardVersion failed: " << result << endl; + massert( 10429 , (string)"setShardVersion failed! " + result.jsonString() , 0 ); + return true; } - + } diff -Nru mongodb-1.4.4/s/strategy.h mongodb-1.6.3/s/strategy.h --- mongodb-1.4.4/s/strategy.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/strategy.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "chunk.h" #include "request.h" @@ -33,10 +33,10 @@ virtual void writeOp( int op , Request& r ) = 0; protected: - void doWrite( int op , Request& r , string server ); - void doQuery( Request& r , string server ); + void doWrite( int op , Request& r , const Shard& shard , bool checkVersion = true ); + void doQuery( Request& r , const Shard& shard ); - void insert( string server , const char * ns , const BSONObj& obj ); + void insert( const Shard& shard , const char * ns , const BSONObj& obj ); }; @@ -44,9 +44,7 @@ extern Strategy * SHARDED; bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result ); - - bool lockNamespaceOnServer( const string& server , const string& ns ); - bool lockNamespaceOnServer( DBClientBase& conn , const string& ns ); - + + void waitForWriteback( const OID& oid ); } diff -Nru mongodb-1.4.4/s/strategy_shard.cpp mongodb-1.6.3/s/strategy_shard.cpp --- mongodb-1.4.4/s/strategy_shard.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/strategy_shard.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,10 +16,12 @@ // strategy_sharded.cpp -#include "stdafx.h" +#include "pch.h" #include "request.h" #include "chunk.h" #include "cursors.h" +#include "stats.h" + #include "../client/connpool.h" #include "../db/commands.h" @@ -37,28 +39,25 @@ if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") ) throw UserException( 8010 , "something is wrong, shouldn't see a command here" ); - ChunkManager * info = r.getChunkManager(); + ChunkManagerPtr info = r.getChunkManager(); assert( info ); Query query( q.query ); - vector shards; - info->getChunksForQuery( shards , query.getFilter() ); + set shards; + info->getShardsForQuery( shards , query.getFilter() ); set servers; - map serverCounts; - for ( vector::iterator i = shards.begin(); i != shards.end(); i++ ){ - servers.insert( ServerAndQuery( (*i)->getShard() , (*i)->getFilter() ) ); - int& num = serverCounts[(*i)->getShard()]; - num++; + for ( set::iterator i = shards.begin(); i != shards.end(); i++ ){ + servers.insert( ServerAndQuery( i->getConnString() , BSONObj() ) ); } if ( logLevel > 4 ){ StringBuilder ss; - ss << " shard query servers: " << servers.size() << "\n"; + ss << " shard query servers: " << servers.size() << '\n'; for ( set::iterator i = servers.begin(); i!=servers.end(); i++ ){ const ServerAndQuery& s = *i; - ss << " " << s.toString() << "\n"; + ss << " " << s.toString() << '\n'; } log() << ss.str(); } @@ -68,33 +67,33 @@ BSONObj sort = query.getSort(); if ( sort.isEmpty() ){ - // 1. no sort, can just hit them in serial cursor = new SerialServerClusteredCursor( servers , q ); } else { - int shardKeyOrder = info->getShardKey().canOrder( sort ); - if ( shardKeyOrder ){ - // 2. sort on shard key, can do in serial intelligently - set buckets; - for ( vector::iterator i = shards.begin(); i != shards.end(); i++ ){ - Chunk * s = *i; - buckets.insert( ServerAndQuery( s->getShard() , s->getFilter() , s->getMin() ) ); - } - cursor = new SerialServerClusteredCursor( buckets , q , shardKeyOrder ); - } - else { - // 3. sort on non-sharded key, pull back a portion from each server and iterate slowly - cursor = new ParallelSortClusteredCursor( servers , q , sort ); - } + cursor = new ParallelSortClusteredCursor( servers , q , sort ); } assert( cursor ); + + try { + cursor->init(); + + log(5) << " cursor type: " << cursor->type() << endl; + shardedCursorTypes.hit( cursor->type() ); - log(5) << " cursor type: " << cursor->type() << endl; + if ( query.isExplain() ){ + BSONObj explain = cursor->explain(); + replyToQuery( 0 , r.p() , r.m() , explain ); + delete( cursor ); + return; + } + } catch(...) { + delete cursor; + throw; + } - ShardedClientCursor * cc = new ShardedClientCursor( q , cursor ); + ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor )); if ( ! cc->sendNextBatch( r ) ){ - delete( cursor ); return; } log(6) << "storing cursor : " << cc->getId() << endl; @@ -107,23 +106,24 @@ log(6) << "want cursor : " << id << endl; - ShardedClientCursor * cursor = cursorCache.get( id ); + ShardedClientCursorPtr cursor = cursorCache.get( id ); if ( ! cursor ){ log(6) << "\t invalid cursor :(" << endl; - replyToQuery( QueryResult::ResultFlag_CursorNotFound , r.p() , r.m() , 0 , 0 , 0 ); + replyToQuery( ResultFlag_CursorNotFound , r.p() , r.m() , 0 , 0 , 0 ); return; } if ( cursor->sendNextBatch( r , ntoreturn ) ){ - log(6) << "\t cursor finished: " << id << endl; + // still more data + cursor->accessed(); return; } - delete( cursor ); + // we've exhausted the cursor cursorCache.remove( id ); } - void _insert( Request& r , DbMessage& d, ChunkManager* manager ){ + void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){ while ( d.moreJSObjs() ){ BSONObj o = d.nextJsObj(); @@ -146,15 +146,32 @@ } - Chunk& c = manager->findChunk( o ); - log(4) << " server:" << c.getShard() << " " << o << endl; - insert( c.getShard() , r.getns() , o ); - - c.splitIfShould( o.objsize() ); + bool gotThrough = false; + for ( int i=0; i<10; i++ ){ + try { + ChunkPtr c = manager->findChunk( o ); + log(4) << " server:" << c->getShard().toString() << " " << o << endl; + insert( c->getShard() , r.getns() , o ); + + r.gotInsert(); + c->splitIfShould( o.objsize() ); + gotThrough = true; + break; + } + catch ( StaleConfigException& ){ + log(1) << "retrying insert because of StaleConfigException: " << o << endl; + r.reset(); + manager = r.getChunkManager(); + } + sleepmillis( i * 200 ); + } + + assert( gotThrough ); + } } - void _update( Request& r , DbMessage& d, ChunkManager* manager ){ + void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ){ int flags = d.pullInt(); BSONObj query = d.nextJsObj(); @@ -166,8 +183,7 @@ bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; - if ( multi ) - uassert( 10202 , "can't mix multi and upsert and sharding" , ! upsert ); + uassert( 10202 , "can't mix multi and upsert and sharding" , ! ( upsert && multi ) ); if ( upsert && !(manager->hasShardKey(toupdate) || (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query)))) @@ -179,8 +195,8 @@ if ( ! manager->hasShardKey( query ) ){ if ( multi ){ } - else if ( query.nFields() != 1 || strcmp( query.firstElement().fieldName() , "_id" ) ){ - throw UserException( 8013 , "can't do update with query that doesn't have the shard key" ); + else if ( strcmp( query.firstElement().fieldName() , "_id" ) || query.nFields() != 1 ){ + throw UserException( 8013 , "can't do non-multi update with query that doesn't have the shard key" ); } else { save = true; @@ -191,34 +207,57 @@ if ( ! save ){ if ( toupdate.firstElement().fieldName()[0] == '$' ){ - // TODO: check for $set, etc.. on shard key - } - else if ( manager->hasShardKey( toupdate ) && manager->getShardKey().compare( query , toupdate ) ){ - throw UserException( 8014 , "change would move shards!" ); + BSONObjIterator ops(toupdate); + while(ops.more()){ + BSONElement op(ops.next()); + if (op.type() != Object) + continue; + BSONObjIterator fields(op.embeddedObject()); + while(fields.more()){ + const string field = fields.next().fieldName(); + uassert(13123, "Can't modify shard key's value", ! manager->getShardKey().partOfShardKey(field)); + } + } + } else if ( manager->hasShardKey( toupdate ) ){ + uassert( 8014, "change would move shards!", manager->getShardKey().compare( query , toupdate ) == 0 ); + } else { + uasserted(12376, "shard key must be in update object"); } } if ( multi ){ - vector chunks; - manager->getChunksForQuery( chunks , chunkFinder ); - set seen; - for ( vector::iterator i=chunks.begin(); i!=chunks.end(); i++){ - Chunk * c = *i; - if ( seen.count( c->getShard() ) ) - continue; - doWrite( dbUpdate , r , c->getShard() ); - seen.insert( c->getShard() ); + set shards; + manager->getShardsForQuery( shards , chunkFinder ); + int * x = (int*)(r.d().afterNS()); + x[0] |= UpdateOption_Broadcast; + for ( set::iterator i=shards.begin(); i!=shards.end(); i++){ + doWrite( dbUpdate , r , *i , false ); } } else { - Chunk& c = manager->findChunk( chunkFinder ); - doWrite( dbUpdate , r , c.getShard() ); - c.splitIfShould( d.msg().data->dataLen() ); + int left = 5; + while ( true ){ + try { + ChunkPtr c = manager->findChunk( chunkFinder ); + doWrite( dbUpdate , r , c->getShard() ); + c->splitIfShould( d.msg().header()->dataLen() ); + break; + } + catch ( StaleConfigException& e ){ + if ( left <= 0 ) + throw e; + left--; + log() << "update failed b/c of StaleConfigException, retrying " + << " left:" << left << " ns: " << r.getns() << " query: " << query << endl; + r.reset( false ); + manager = r.getChunkManager(); + } + } } } - void _delete( Request& r , DbMessage& d, ChunkManager* manager ){ + void _delete( Request& r , DbMessage& d, ChunkManagerPtr manager ){ int flags = d.pullInt(); bool justOne = flags & 1; @@ -226,24 +265,38 @@ uassert( 10203 , "bad delete message" , d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); - vector chunks; - manager->getChunksForQuery( chunks , pattern ); - cout << "delete : " << pattern << " \t " << chunks.size() << " justOne: " << justOne << endl; - if ( chunks.size() == 1 ){ - doWrite( dbDelete , r , chunks[0]->getShard() ); - return; + set shards; + int left = 5; + + while ( true ){ + try { + manager->getShardsForQuery( shards , pattern ); + log(2) << "delete : " << pattern << " \t " << shards.size() << " justOne: " << justOne << endl; + if ( shards.size() == 1 ){ + doWrite( dbDelete , r , *shards.begin() ); + return; + } + break; + } + catch ( StaleConfigException& e ){ + if ( left <= 0 ) + throw e; + left--; + log() << "delete failed b/c of StaleConfigException, retrying " + << " left:" << left << " ns: " << r.getns() << " patt: " << pattern << endl; + r.reset( false ); + shards.clear(); + manager = r.getChunkManager(); + } } if ( justOne && ! pattern.hasField( "_id" ) ) throw UserException( 8015 , "can only delete with a non-shard key pattern if can delete as many as we find" ); - set seen; - for ( vector::iterator i=chunks.begin(); i!=chunks.end(); i++){ - Chunk * c = *i; - if ( seen.count( c->getShard() ) ) - continue; - seen.insert( c->getShard() ); - doWrite( dbDelete , r , c->getShard() ); + for ( set::iterator i=shards.begin(); i!=shards.end(); i++){ + int * x = (int*)(r.d().afterNS()); + x[0] |= RemoveOption_Broadcast; + doWrite( dbDelete , r , *i , false ); } } @@ -252,7 +305,7 @@ log(3) << "write: " << ns << endl; DbMessage& d = r.d(); - ChunkManager * info = r.getChunkManager(); + ChunkManagerPtr info = r.getChunkManager(); assert( info ); if ( op == dbInsert ){ diff -Nru mongodb-1.4.4/s/strategy_single.cpp mongodb-1.6.3/s/strategy_single.cpp --- mongodb-1.4.4/s/strategy_single.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/strategy_single.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ // strategy_simple.cpp -#include "stdafx.h" +#include "pch.h" #include "request.h" #include "../client/connpool.h" #include "../db/commands.h" @@ -40,29 +40,59 @@ log(3) << "single query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn << endl; try { - if ( ( q.ntoreturn == -1 || q.ntoreturn == 1 ) && strstr(q.ns, ".$cmd") ) { - BSONObjBuilder builder; - bool ok = Command::runAgainstRegistered(q.ns, q.query, builder); - if ( ok ) { - BSONObj x = builder.done(); - replyToQuery(0, r.p(), r.m(), x); + if ( r.isCommand() ){ + + if ( handleSpecialNamespaces( r , q ) ) return; + + int loops = 5; + while ( true ){ + BSONObjBuilder builder; + try { + bool ok = Command::runAgainstRegistered(q.ns, q.query, builder); + if ( ok ) { + BSONObj x = builder.done(); + replyToQuery(0, r.p(), r.m(), x); + return; + } + break; + } + catch ( StaleConfigException& e ){ + if ( loops <= 0 ) + throw e; + + loops--; + log() << "retrying command: " << q.query << endl; + ShardConnection::checkMyConnectionVersions( e.getns() ); + } + catch ( AssertionException& e ){ + e.getInfo().append( builder , "assertion" , "assertionCode" ); + builder.append( "errmsg" , "db assertion failure" ); + builder.append( "ok" , 0 ); + BSONObj x = builder.done(); + replyToQuery(0, r.p(), r.m(), x); + return; + } } string commandName = q.query.firstElement().fieldName(); - if ( ! _commandsSafeToPass.count( commandName ) ) - log() << "passing through unknown command: " << commandName << " " << q.query << endl; - } + uassert(13390, "unrecognized command: " + commandName, _commandsSafeToPass.count(commandName) != 0); + } + lateAssert = true; - doQuery( r , r.singleServerName() ); + doQuery( r , r.primaryShard() ); } catch ( AssertionException& e ) { - assert( !lateAssert ); + if ( lateAssert ){ + log() << "lateAssert: " << e.getInfo() << endl; + assert( !lateAssert ); + } + BSONObjBuilder err; - err.append("$err", string("mongos: ") + (e.msg.empty() ? "assertion during query" : e.msg)); + e.getInfo().append( err ); BSONObj errObj = err.done(); - replyToQuery(QueryResult::ResultFlag_ErrSet, r.p() , r.m() , errObj); + replyToQuery(ResultFlag_ErrSet, r.p() , r.m() , errObj); return; } @@ -73,18 +103,14 @@ log(3) << "single getmore: " << ns << endl; - ScopedDbConnection dbcon( r.singleServerName() ); - DBClientBase& _c = dbcon.conn(); - - // TODO - DBClientConnection &c = dynamic_cast(_c); + ShardConnection conn( r.primaryShard() , ns ); Message response; - bool ok = c.port().call( r.m() , response); + bool ok = conn->callRead( r.m() , response); uassert( 10204 , "dbgrid: getmore: error calling db", ok); - r.reply( response ); + r.reply( response , conn->getServerAddress() ); - dbcon.done(); + conn.done(); } @@ -97,18 +123,26 @@ BSONObj o = d.nextJsObj(); const char * ns = o["ns"].valuestr(); if ( r.getConfig()->isSharded( ns ) ){ + BSONObj newIndexKey = o["key"].embeddedObjectUserCheck(); + uassert( 10205 , (string)"can't use unique indexes with sharding ns:" + ns + " key: " + o["key"].embeddedObjectUserCheck().toString() , - IndexDetails::isIdIndexPattern( o["key"].embeddedObjectUserCheck() ) || - ! o["unique"].trueValue() ); - ChunkManager * cm = r.getConfig()->getChunkManager( ns ); + IndexDetails::isIdIndexPattern( newIndexKey ) || + ! o["unique"].trueValue() || + r.getConfig()->getChunkManager( ns )->getShardKey().isPrefixOf( newIndexKey ) ); + + ChunkManagerPtr cm = r.getConfig()->getChunkManager( ns ); assert( cm ); - for ( int i=0; inumChunks();i++) - doWrite( op , r , cm->getChunk(i)->getShard() ); + + set shards; + cm->getAllShards(shards); + for (set::const_iterator it=shards.begin(), end=shards.end(); it != end; ++it) + doWrite( op , r , *it ); } else { - doWrite( op , r , r.singleServerName() ); + doWrite( op , r , r.primaryShard() ); } + r.gotInsert(); } } else if ( op == dbUpdate ){ @@ -129,15 +163,104 @@ const char *ns = r.getns(); if ( r.isShardingEnabled() && - strstr( ns , ".system.indexes" ) == strstr( ns , "." ) && - strstr( ns , "." ) ){ + strstr( ns , ".system.indexes" ) == strchr( ns , '.' ) && + strchr( ns , '.' ) ) { log(1) << " .system.indexes write for: " << ns << endl; handleIndexWrite( op , r ); return; } log(3) << "single write: " << ns << endl; - doWrite( op , r , r.singleServerName() ); + doWrite( op , r , r.primaryShard() ); + r.gotInsert(); // Won't handle mulit-insert correctly. Not worth parsing the request. + } + + bool handleSpecialNamespaces( Request& r , QueryMessage& q ){ + const char * ns = r.getns(); + ns = strstr( r.getns() , ".$cmd.sys." ); + if ( ! ns ) + return false; + ns += 10; + + BSONObjBuilder b; + vector shards; + + if ( strcmp( ns , "inprog" ) == 0 ){ + Shard::getAllShards( shards ); + + BSONArrayBuilder arr( b.subarrayStart( "inprog" ) ); + + for ( unsigned i=0; ifindOne( r.getns() , BSONObj() ); + if ( temp["inprog"].isABSONObj() ){ + BSONObjIterator i( temp["inprog"].Obj() ); + while ( i.more() ){ + BSONObjBuilder x; + + BSONObjIterator j( i.next().Obj() ); + while( j.more() ){ + BSONElement e = j.next(); + if ( strcmp( e.fieldName() , "opid" ) == 0 ){ + stringstream ss; + ss << shard.getName() << ':' << e.numberInt(); + x.append( "opid" , ss.str() ); + } + else { + x.append( e ); + } + } + arr.append( x.obj() ); + } + } + conn.done(); + } + + arr.done(); + } + else if ( strcmp( ns , "killop" ) == 0 ){ + BSONElement e = q.query["op"]; + if ( strstr( r.getns() , "admin." ) != 0 ){ + b.append( "err" , "unauthorized" ); + } + else if ( e.type() != String ){ + b.append( "err" , "bad op" ); + b.append( e ); + } + else { + b.append( e ); + string s = e.String(); + string::size_type i = s.find( ':' ); + if ( i == string::npos ){ + b.append( "err" , "bad opid" ); + } + else { + string shard = s.substr( 0 , i ); + int opid = atoi( s.substr( i + 1 ).c_str() ); + b.append( "shard" , shard ); + b.append( "shardid" , opid ); + + log() << "want to kill op: " << e << endl; + Shard s(shard); + + ScopedDbConnection conn( s ); + conn->findOne( r.getns() , BSON( "op" << opid ) ); + conn.done(); + } + } + } + else if ( strcmp( ns , "unlock" ) == 0 ){ + b.append( "err" , "can't do unlock through mongos" ); + } + else { + log( LL_WARNING ) << "unknown sys command [" << ns << "]" << endl; + return false; + } + + BSONObj x = b.done(); + replyToQuery(0, r.p(), r.m(), x); + return true; } set _commandsSafeToPass; diff -Nru mongodb-1.4.4/s/util.h mongodb-1.6.3/s/util.h --- mongodb-1.4.4/s/util.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/s/util.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,35 +18,139 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../client/dbclient.h" +#include "../db/jsobj.h" /** some generic sharding utils that can be used in mongod or mongos */ namespace mongo { + + struct ShardChunkVersion { + union { + struct { + int _minor; + int _major; + }; + unsigned long long _combined; + }; + + ShardChunkVersion( int major=0, int minor=0 ) + : _minor(minor),_major(major){ + } + + ShardChunkVersion( unsigned long long ll ) + : _combined( ll ){ + } + + ShardChunkVersion( const BSONElement& e ){ + if ( e.type() == Date || e.type() == Timestamp ){ + _combined = e._numberLong(); + } + else if ( e.eoo() ){ + _combined = 0; + } + else { + log() << "ShardChunkVersion can't handle type (" << (int)(e.type()) << ") " << e << endl; + assert(0); + } + } + + void inc( bool major ){ + if ( major ) + incMajor(); + else + incMinor(); + } + + void incMajor() { + _major++; + _minor = 0; + } + + void incMinor() { + _minor++; + } + + unsigned long long toLong() const { + return _combined; + } + + bool isSet() const { + return _combined > 0; + } + + string toString() const { + stringstream ss; + ss << _major << "|" << _minor; + return ss.str(); + } + + int majorVersion() const { return _major; } + int minorVersion() const { return _minor; } + + operator unsigned long long() const { return _combined; } + + ShardChunkVersion& operator=( const BSONElement& elem ){ + switch ( elem.type() ){ + case Timestamp: + case NumberLong: + case Date: + _combined = elem._numberLong(); + break; + case EOO: + _combined = 0; + break; + default: + assert(0); + } + return *this; + } + }; + inline ostream& operator<<( ostream &s , const ShardChunkVersion& v){ + s << v._major << "|" << v._minor; + return s; + } + /** - your config info for a given shard/chunk is out of date */ - class StaleConfigException : public std::exception { + * your config info for a given shard/chunk is out of date + */ + class StaleConfigException : public AssertionException { public: - StaleConfigException( const string& ns , const string& msg){ - stringstream s; - s << "StaleConfigException ns: " << ns << " " << msg; - _msg = s.str(); - log(1) << _msg << endl; + StaleConfigException( const string& ns , const string& raw , bool justConnection = false ) + : AssertionException( (string)"ns: " + ns + " " + raw , 9996 ) , + _justConnection(justConnection) , + _ns(ns){ } - + virtual ~StaleConfigException() throw(){} - virtual const char* what() const throw(){ - return _msg.c_str(); + virtual void appendPrefix( stringstream& ss ) const { ss << "StaleConfigException: "; } + + bool justConnection() const { return _justConnection; } + + string getns() const { return _ns; } + + static bool parse( const string& big , string& ns , string& raw ){ + string::size_type start = big.find( '[' ); + if ( start == string::npos ) + return false; + string::size_type end = big.find( ']' ,start ); + if ( end == string::npos ) + return false; + + ns = big.substr( start + 1 , ( end - start ) - 1 ); + raw = big.substr( end + 1 ); + return true; } private: - string _msg; + bool _justConnection; + string _ns; }; - void checkShardVersion( DBClientBase & conn , const string& ns , bool authoritative = false ); - + bool checkShardVersion( DBClientBase & conn , const string& ns , bool authoritative = false , int tryNumber = 1 ); + void resetShardVersion( DBClientBase * conn ); } diff -Nru mongodb-1.4.4/SConstruct mongodb-1.6.3/SConstruct --- mongodb-1.4.4/SConstruct 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/SConstruct 2010-09-24 10:02:42.000000000 -0700 @@ -1,6 +1,6 @@ # -*- mode: python; -*- -# build file for 10gen db -# this request scons +# build file for MongoDB +# this requires scons # you can get from http://www.scons.org # then just type scons @@ -49,7 +49,6 @@ metavar='DIR', help='additional piece for full dist name') - AddOption( "--64", dest="force64", type="string", @@ -90,23 +89,6 @@ help="fully static build") -AddOption('--java', - dest='javaHome', - type='string', - default="/opt/java/", - nargs=1, - action='store', - metavar='DIR', - help='java home') - -AddOption('--nojni', - dest='nojni', - type="string", - nargs=0, - action="store", - help="turn off jni support" ) - - AddOption('--usesm', dest='usesm', type="string", @@ -121,13 +103,6 @@ action="store", help="use v8 for javascript" ) -AddOption('--usejvm', - dest='usejvm', - type="string", - nargs=0, - action="store", - help="use java for javascript" ) - AddOption('--asio', dest='asio', type="string", @@ -175,14 +150,14 @@ type="string", nargs=1, action="store", - help="comma seperated list of add'l paths (--extrapath /opt/foo/,/foo) static linking" ) + help="comma separated list of add'l paths (--extrapath /opt/foo/,/foo) static linking" ) AddOption( "--extrapathdyn", dest="extrapathdyn", type="string", nargs=1, action="store", - help="comma seperated list of add'l paths (--extrapath /opt/foo/,/foo) dynamic linking" ) + help="comma separated list of add'l paths (--extrapath /opt/foo/,/foo) dynamic linking" ) AddOption( "--extralib", @@ -190,21 +165,21 @@ type="string", nargs=1, action="store", - help="comma seperated list of libraries (--extralib js_static,readline" ) + help="comma separated list of libraries (--extralib js_static,readline" ) AddOption( "--staticlib", dest="staticlib", type="string", nargs=1, action="store", - help="comma seperated list of libs to link statically (--staticlib js_static,boost_program_options-mt,..." ) + help="comma separated list of libs to link statically (--staticlib js_static,boost_program_options-mt,..." ) AddOption( "--staticlibpath", dest="staticlibpath", type="string", nargs=1, action="store", - help="comma seperated list of dirs to search for staticlib arguments" ) + help="comma separated list of dirs to search for staticlib arguments" ) AddOption( "--cxx", dest="cxx", @@ -228,6 +203,20 @@ action="store", help="boost version for linking(1_38)" ) +AddOption( "--cpppath", + dest="cpppath", + type="string", + nargs=1, + action="store", + help="Include path if you have headers in a nonstandard directory" ) + +AddOption( "--libpath", + dest="libpath", + type="string", + nargs=1, + action="store", + help="Library path if you have libraries in a nonstandard directory" ) + # # to use CPUPROFILE=/tmp/profile # to view pprof -gv mongod /tmp/profile @@ -251,9 +240,26 @@ AddOption("--sharedclient", dest="sharedclient", - action="store", + action="store_true", help="build a libmongoclient.so/.dll") +AddOption("--full", + dest="full", + action="store_true", + help="include client and headers when doing scons install") + +AddOption("--smokedbprefix", + dest="smokedbprefix", + action="store", + help="prefix to dbpath et al. for smoke tests") + +AddOption( "--pch", + dest="usePCH", + type="string", + nargs=0, + action="store", + help="use precompiled headers to speed up the build (experimental)" ) + # --- environment setup --- def removeIfInList( lst , thing ): @@ -271,12 +277,12 @@ onlyServer = len( COMMAND_LINE_TARGETS ) == 0 or ( len( COMMAND_LINE_TARGETS ) == 1 and str( COMMAND_LINE_TARGETS[0] ) in [ "mongod" , "mongos" , "test" ] ) nix = False -useJavaHome = False linux = False linux64 = False darwin = False windows = False freebsd = False +openbsd = False solaris = False force64 = not GetOption( "force64" ) is None if not force64 and os.getcwd().endswith( "mongo-64" ): @@ -293,14 +299,14 @@ debugBuild = ( not GetOption( "debugBuild" ) is None ) or ( not GetOption( "debugBuildAndLogging" ) is None ) debugLogging = not GetOption( "debugBuildAndLogging" ) is None noshell = not GetOption( "noshell" ) is None -nojni = not GetOption( "nojni" ) is None usesm = not GetOption( "usesm" ) is None usev8 = not GetOption( "usev8" ) is None -usejvm = not GetOption( "usejvm" ) is None asio = not GetOption( "asio" ) is None +usePCH = not GetOption( "usePCH" ) is None + justClientLib = (COMMAND_LINE_TARGETS == ['mongoclient']) env = Environment( MSVS_ARCH=msarch , tools = ["default", "gch"], toolpath = '.' ) @@ -309,9 +315,15 @@ env["CXX"] = GetOption( "cxx" ) env["LIBPATH"] = [] +if GetOption( "libpath" ) is not None: + env["LIBPATH"] = [GetOption( "libpath" )] + +if GetOption( "cpppath" ) is not None: + env["CPPPATH"] = [GetOption( "cpppath" )] + if GetOption( "recstore" ) != None: env.Append( CPPDEFINES=[ "_RECSTORE" ] ) -env.Append( CPPDEFINES=[ "_SCONS" ] ) +env.Append( CPPDEFINES=[ "_SCONS" , "MONGO_EXPOSE_MACROS" ] ) env.Append( CPPPATH=[ "." ] ) @@ -330,13 +342,11 @@ else: boostVersion = "-" + boostVersion -if ( usesm and usejvm ): - print( "can't say usesm and usejvm at the same time" ) - Exit(1) - -if ( not ( usesm or usejvm or usev8 or justClientLib) ): +if ( not ( usesm or usev8 or justClientLib) ): usesm = True +distBuild = len( COMMAND_LINE_TARGETS ) == 1 and ( str( COMMAND_LINE_TARGETS[0] ) == "s3dist" or str( COMMAND_LINE_TARGETS[0] ) == "dist" ) + extraLibPlaces = [] def addExtraLibs( s ): @@ -357,15 +367,52 @@ for x in GetOption( "extralib" ).split( "," ): env.Append( LIBS=[ x ] ) +class InstallSetup: + binaries = False + clientSrc = False + headers = False + bannerDir = None + headerRoot = "include" + + def __init__(self): + self.default() + + def default(self): + self.binaries = True + self.libraries = False + self.clientSrc = False + self.headers = False + self.bannerDir = None + self.headerRoot = "include" + self.clientTestsDir = None + + def justClient(self): + self.binaries = False + self.libraries = False + self.clientSrc = True + self.headers = True + self.bannerDir = "distsrc/client/" + self.headerRoot = "" + self.clientTestsDir = "client/examples/" + +installSetup = InstallSetup() +if distBuild: + installSetup.bannerDir = "distsrc" + +if GetOption( "full" ): + installSetup.headers = True + installSetup.libraries = True + + # ------ SOURCE FILE SETUP ----------- -commonFiles = Split( "stdafx.cpp buildinfo.cpp db/common.cpp db/jsobj.cpp db/json.cpp db/lasterror.cpp db/nonce.cpp db/queryutil.cpp shell/mongo.cpp" ) -commonFiles += [ "util/background.cpp" , "util/mmap.cpp" , "util/sock.cpp" , "util/util.cpp" , "util/message.cpp" , - "util/assert_util.cpp" , "util/httpclient.cpp" , "util/md5main.cpp" , "util/base64.cpp", "util/debug_util.cpp", - "util/thread_pool.cpp" ] +commonFiles = Split( "pch.cpp buildinfo.cpp db/common.cpp db/jsobj.cpp db/json.cpp db/lasterror.cpp db/nonce.cpp db/queryutil.cpp shell/mongo.cpp" ) +commonFiles += [ "util/background.cpp" , "util/mmap.cpp" , "util/ramstore.cpp", "util/sock.cpp" , "util/util.cpp" , "util/message.cpp" , + "util/assert_util.cpp" , "util/log.cpp" , "util/httpclient.cpp" , "util/md5main.cpp" , "util/base64.cpp", "util/concurrency/vars.cpp", "util/concurrency/task.cpp", "util/debug_util.cpp", + "util/concurrency/thread_pool.cpp", "util/password.cpp", "util/version.cpp", + "util/histogram.cpp", "util/concurrency/spin_lock.cpp", "util/text.cpp" , "util/stringutils.cpp" , "util/processinfo.cpp" ] commonFiles += Glob( "util/*.c" ) -commonFiles += Split( "client/connpool.cpp client/dbclient.cpp client/model.cpp client/parallel.cpp client/syncclusterconnection.cpp" ) -commonFiles += [ "scripting/engine.cpp" , "scripting/utils.cpp" ] +commonFiles += Split( "client/connpool.cpp client/dbclient.cpp client/dbclientcursor.cpp client/model.cpp client/syncclusterconnection.cpp client/distlock.cpp s/shardconnection.cpp" ) #mmap stuff @@ -382,39 +429,47 @@ commonFiles += [ "util/processinfo_none.cpp" ] coreDbFiles = [ "db/commands.cpp" ] -coreServerFiles = [ "util/message_server_port.cpp" , "util/message_server_asio.cpp" ] +coreServerFiles = [ "util/message_server_port.cpp" , + "client/parallel.cpp" , + "util/miniwebserver.cpp" , "db/dbwebserver.cpp" , + "db/matcher.cpp" , "db/indexkey.cpp" , "db/dbcommands_generic.cpp" ] + +if GetOption( "asio" ) != None: + coreServerFiles += [ "util/message_server_asio.cpp" ] + +serverOnlyFiles = Split( "db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/security.cpp db/storage.cpp db/queryoptimizer.cpp db/extsort.cpp db/mr.cpp s/d_util.cpp db/cmdline.cpp" ) -serverOnlyFiles = Split( "db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/matcher.cpp db/dbeval.cpp db/dbwebserver.cpp db/dbhelpers.cpp db/instance.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/client.cpp db/security.cpp util/miniwebserver.cpp db/storage.cpp db/reccache.cpp db/queryoptimizer.cpp db/extsort.cpp db/mr.cpp s/d_util.cpp db/cmdline.cpp" ) -serverOnlyFiles += [ "db/index.cpp" ] + Glob( "db/index_*.cpp" ) +serverOnlyFiles += [ "db/index.cpp" ] + Glob( "db/geo/*.cpp" ) -serverOnlyFiles += Glob( "db/dbcommands*.cpp" ) -serverOnlyFiles += Glob( "db/stats/*.cpp" ) +serverOnlyFiles += [ "db/dbcommands.cpp" , "db/dbcommands_admin.cpp" ] +coreServerFiles += Glob( "db/stats/*.cpp" ) serverOnlyFiles += [ "db/driverHelpers.cpp" ] +scriptingFiles = [ "scripting/engine.cpp" , "scripting/utils.cpp" ] + if usesm: - commonFiles += [ "scripting/engine_spidermonkey.cpp" ] - nojni = True + scriptingFiles += [ "scripting/engine_spidermonkey.cpp" ] elif usev8: - commonFiles += [ Glob( "scripting/*v8*.cpp" ) ] - nojni = True -elif not (nojni or justClientLib) : - commonFiles += [ "scripting/engine_java.cpp" ] + scriptingFiles += [ Glob( "scripting/*v8*.cpp" ) ] else: - commonFiles += [ "scripting/engine_none.cpp" ] - nojni = True + scriptingFiles += [ "scripting/engine_none.cpp" ] + +coreServerFiles += scriptingFiles -coreShardFiles = [] -shardServerFiles = coreShardFiles + Glob( "s/strategy*.cpp" ) + [ "s/commands_admin.cpp" , "s/commands_public.cpp" , "s/request.cpp" , "s/cursors.cpp" , "s/server.cpp" , "s/chunk.cpp" , "s/shardkey.cpp" , "s/config.cpp" , "s/s_only.cpp" , "db/cmdline.cpp" ] -serverOnlyFiles += coreShardFiles + [ "s/d_logic.cpp" ] +coreShardFiles = [ "s/config.cpp" , "s/grid.cpp" , "s/chunk.cpp" , "s/shard.cpp" , "s/shardkey.cpp" ] +shardServerFiles = coreShardFiles + Glob( "s/strategy*.cpp" ) + [ "s/commands_admin.cpp" , "s/commands_public.cpp" , "s/request.cpp" , "s/cursors.cpp" , "s/server.cpp" , "s/config_migrate.cpp" , "s/s_only.cpp" , "s/stats.cpp" , "s/balance.cpp" , "s/balancer_policy.cpp" , "db/cmdline.cpp" ] +serverOnlyFiles += coreShardFiles + [ "s/d_logic.cpp" , "s/d_writeback.cpp" , "s/d_migrate.cpp" , "s/d_state.cpp" , "s/d_split.cpp" , "client/distlock_test.cpp" ] serverOnlyFiles += [ "db/module.cpp" ] + Glob( "db/modules/*.cpp" ) modules = [] +moduleNames = [] for x in os.listdir( "db/modules/" ): if x.find( "." ) >= 0: continue print( "adding module: " + x ) + moduleNames.append( x ) modRoot = "db/modules/" + x + "/" serverOnlyFiles += Glob( modRoot + "src/*.cpp" ) modBuildFile = modRoot + "build.py" @@ -423,8 +478,6 @@ allClientFiles = commonFiles + coreDbFiles + [ "client/clientOnly.cpp" , "client/gridfs.cpp" , "s/d_util.cpp" ]; -allCXXFiles = allClientFiles + coreShardFiles + shardServerFiles + serverOnlyFiles; - # ---- other build setup ----- platform = os.sys.platform @@ -438,23 +491,24 @@ if force64: processor = "x86_64" -DEFAULT_INSTALl_DIR = "/usr/local" -installDir = DEFAULT_INSTALl_DIR +DEFAULT_INSTALL_DIR = "/usr/local" +installDir = DEFAULT_INSTALL_DIR nixLibPrefix = "lib" distName = GetOption( "distname" ) dontReplacePackage = False -javaHome = GetOption( "javaHome" ) -javaVersion = "i386"; -javaLibs = [] - -distBuild = len( COMMAND_LINE_TARGETS ) == 1 and ( str( COMMAND_LINE_TARGETS[0] ) == "s3dist" or str( COMMAND_LINE_TARGETS[0] ) == "dist" ) if distBuild: release = True +def isDriverBuild(): + return GetOption( "prefix" ) and GetOption( "prefix" ).find( "mongo-cxx-driver" ) >= 0 + if GetOption( "prefix" ): installDir = GetOption( "prefix" ) + if isDriverBuild(): + installSetup.justClient() + def findVersion( root , choices ): if not isinstance(root, list): @@ -478,12 +532,6 @@ darwin = True platform = "osx" # prettier than darwin - if usejvm: - env.Append( CPPPATH=[ "-I/System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Headers/" ] ) - - if not nojni: - env.Append( FRAMEWORKS=["JavaVM"] ) - if env["CXX"] is None: if os.path.exists( "/usr/bin/g++-4.2" ): env["CXX"] = "g++-4.2" @@ -493,7 +541,7 @@ if force64: env.Append( CPPPATH=["/usr/64/include"] ) env.Append( LIBPATH=["/usr/64/lib"] ) - if installDir == DEFAULT_INSTALl_DIR and not distBuild: + if installDir == DEFAULT_INSTALL_DIR and not distBuild: installDir = "/usr/64/" else: env.Append( CPPPATH=filterExists(["/sw/include" , "/opt/local/include"]) ) @@ -501,15 +549,10 @@ elif "linux2" == os.sys.platform: linux = True - useJavaHome = True - javaOS = "linux" platform = "linux" - javaHome = choosePathExist( [ javaHome , "/usr/lib/jvm/java/" , os.environ.get( "JAVA_HOME" ) ] , "/usr/lib/jvm/java/" ) - if os.uname()[4] == "x86_64" and not force32: linux64 = True - javaVersion = "amd64" nixLibPrefix = "lib64" env.Append( LIBPATH=["/usr/lib64" , "/lib64" ] ) env.Append( LIBS=["pthread"] ) @@ -527,10 +570,7 @@ elif "sunos5" == os.sys.platform: nix = True solaris = True - useJavaHome = True - javaHome = "/usr/lib/jvm/java-6-sun/" - javaOS = "solaris" - env.Append( CPPDEFINES=[ "__linux__" , "__sunos__" ] ) + env.Append( CPPDEFINES=[ "__sunos__" ] ) env.Append( LIBS=["socket","resolv"] ) elif os.sys.platform.startswith( "freebsd" ): @@ -540,13 +580,21 @@ env.Append( LIBPATH=[ "/usr/local/lib" ] ) env.Append( CPPDEFINES=[ "__freebsd__" ] ) +elif os.sys.platform.startswith( "openbsd" ): + nix = True + openbsd = True + env.Append( CPPPATH=[ "/usr/local/include" ] ) + env.Append( LIBPATH=[ "/usr/local/lib" ] ) + env.Append( CPPDEFINES=[ "__openbsd__" ] ) + elif "win32" == os.sys.platform: windows = True - if force64: - release = True + #if force64: + # release = True for pathdir in env['ENV']['PATH'].split(os.pathsep): if os.path.exists(os.path.join(pathdir, 'cl.exe')): + print( "found visual studio at " + pathdir ) break else: #use current environment @@ -554,75 +602,101 @@ def find_boost(): for x in ('', ' (x86)'): + boostDir = "C:/Program Files" + x + "/boost/latest" + if os.path.exists( boostDir ): + return boostDir for bv in reversed( range(33,50) ): for extra in ('', '_0', '_1'): boostDir = "C:/Program Files" + x + "/Boost/boost_1_" + str(bv) + extra if os.path.exists( boostDir ): return boostDir if os.path.exists( "C:/boost" ): - return "C:/boost" + return "C:/boost" if os.path.exists( "/boost" ): - return "/boost" + return "/boost" return None - boostDir = find_boost() if boostDir is None: print( "can't find boost" ) Exit(1) - - if force64 and os.path.exists( boostDir + "/lib/vs2010_64" ): - env.Append( LIBPATH=[ boostDir + "/lib/vs2010_64" ] ) - elif not force64 and os.path.exists( boostDir + "/lib/vs2010_32" ): - env.Append( LIBPATH=[ boostDir + "/lib/vs2010_32" ] ) else: - env.Append( LIBPATH=[ boostDir + "/Lib" ] ) - + print( "boost found at '" + boostDir + "'" ) serverOnlyFiles += [ "util/ntservice.cpp" ] boostLibs = [] - if usesm: - env.Append( CPPPATH=[ "js/src/" ] ) - env.Append(CPPPATH=["../js/src/"]) - env.Append(LIBPATH=["../js/src"]) - env.Append( CPPDEFINES=[ "OLDJS" ] ) - elif not justClientLib: - javaHome = findVersion( "C:/Program Files/java/" , - [ "jdk" , "jdk1.6.0_10" ] ) - env.Append( CPPPATH=[ javaHome + "/include" , javaHome + "/include/win32" ] ) - env.Append( LIBPATH=[ javaHome + "/Lib" ] ) - javaLibs += [ "jvm" ]; + env.Append(CPPPATH=[ "js/src/" ]) + env.Append(CPPPATH=["../js/src/"]) + env.Append(LIBPATH=["../js/src"]) + env.Append(LIBPATH=["../js/"]) + + env.Append( CPPDEFINES=[ "OLDJS" ] ) + env.Append( CPPDEFINES=[ "_UNICODE" ] ) + env.Append( CPPDEFINES=[ "UNICODE" ] ) winSDKHome = findVersion( [ "C:/Program Files/Microsoft SDKs/Windows/", "C:/Program Files (x86)/Microsoft SDKs/Windows/" ] , - [ "v6.0" , "v6.0a" , "v6.1", "v7.0A" ] ) + [ "v7.0A", "v7.0", "v6.1", "v6.0a", "v6.0" ] ) + print( "Windows SDK Root '" + winSDKHome + "'" ) env.Append( CPPPATH=[ boostDir , "pcre-7.4" , winSDKHome + "/Include" ] ) + # consider adding /MP build with multiple processes option. + + # /EHsc exception handling style for visual studio + # /W3 warning level env.Append( CPPFLAGS=" /EHsc /W3 " ) - env.Append( CPPFLAGS=" /wd4355 /wd4800 " ) #some warnings we don't like - env.Append( CPPDEFINES=["WIN32","_CONSOLE","_CRT_SECURE_NO_WARNINGS","HAVE_CONFIG_H","PCRE_STATIC","_UNICODE","UNICODE","SUPPORT_UCP","SUPPORT_UTF8,PSAPI_VERSION=1" ] ) - #env.Append( CPPFLAGS=' /Yu"stdafx.h" ' ) # this would be for pre-compiled headers, could play with it later + # some warnings we don't like: + env.Append( CPPFLAGS=" /wd4355 /wd4800 /wd4267 /wd4244 " ) + + env.Append( CPPDEFINES=["WIN32","_CONSOLE","_CRT_SECURE_NO_WARNINGS","HAVE_CONFIG_H","PCRE_STATIC","SUPPORT_UCP","SUPPORT_UTF8,PSAPI_VERSION=1" ] ) + + #env.Append( CPPFLAGS=' /Yu"pch.h" ' ) # this would be for pre-compiled headers, could play with it later + # docs say don't use /FD from command line + # /Gy funtion level linking + # /Gm is minimal rebuild, but may not work in parallel mode. if release: env.Append( CPPDEFINES=[ "NDEBUG" ] ) - env.Append( CPPFLAGS= " /O2 /Oi /FD /MT /Gy /nologo /Zi /TP /errorReport:prompt /Gm " ) - #env.Append( CPPFLAGS= " /GL " ) # TODO: this has caused some linking problems + env.Append( CPPFLAGS= " /O2 /MT /Gy /Zi /TP /errorReport:none " ) + # TODO: this has caused some linking problems : + # /GL whole program optimization + # /LTCG link time code generation + env.Append( CPPFLAGS= " /GL " ) + env.Append( LINKFLAGS=" /LTCG " ) else: env.Append( CPPDEFINES=[ "_DEBUG" ] ) - env.Append( CPPFLAGS=" /Od /Gm /RTC1 /MDd /ZI " ) + # /Od disable optimization + # /ZI debug info w/edit & continue + # /TP it's a c++ file + # RTC1 /GZ (Enable Stack Frame Run-Time Error Checking) + env.Append( CPPFLAGS=" /Od /RTC1 /MDd /Z7 /TP /errorReport:none " ) env.Append( CPPFLAGS=' /Fd"mongod.pdb" ' ) - env.Append( LINKFLAGS=" /incremental:yes /debug " ) + env.Append( LINKFLAGS=" /debug " ) + + if os.path.exists("../readline/lib") : + env.Append( LIBPATH=["../readline/lib"] ) + env.Append( CPPPATH=["../readline/include"] ) + + if force64 and os.path.exists( boostDir + "/lib/vs2010_64" ): + env.Append( LIBPATH=[ boostDir + "/lib/vs2010_64" ] ) + elif not force64 and os.path.exists( boostDir + "/lib/vs2010_32" ): + env.Append( LIBPATH=[ boostDir + "/lib/vs2010_32" ] ) + else: + env.Append( LIBPATH=[ boostDir + "/Lib" ] ) - env.Append( LIBPATH=[ boostDir + "/Lib" ] ) if force64: env.Append( LIBPATH=[ winSDKHome + "/Lib/x64" ] ) - env.Append( LINKFLAGS=" /NODEFAULTLIB:MSVCPRT /NODEFAULTLIB:MSVCRT " ) else: env.Append( LIBPATH=[ winSDKHome + "/Lib" ] ) + if release: + #env.Append( LINKFLAGS=" /NODEFAULTLIB:MSVCPRT /NODEFAULTLIB:MSVCRTD " ) + env.Append( LINKFLAGS=" /NODEFAULTLIB:MSVCPRT " ) + else: + env.Append( LINKFLAGS=" /NODEFAULTLIB:MSVCPRT /NODEFAULTLIB:MSVCRT " ) def pcreFilter(x): name = x.name @@ -647,17 +721,21 @@ winLibString = "ws2_32.lib kernel32.lib advapi32.lib Psapi.lib" if force64: - winLibString += " LIBCMT LIBCPMT " + + winLibString += "" + #winLibString += " LIBCMT LIBCPMT " + else: winLibString += " user32.lib gdi32.lib winspool.lib comdlg32.lib shell32.lib ole32.lib oleaut32.lib " winLibString += " odbc32.lib odbccp32.lib uuid.lib " env.Append( LIBS=Split(winLibString) ) - if force64: - env.Append( CPPDEFINES=["_AMD64_=1"] ) - else: - env.Append( CPPDEFINES=["_X86_=1"] ) + # dm these should automatically be defined by the compiler. commenting out to see if works. jun2010 + #if force64: + # env.Append( CPPDEFINES=["_AMD64_=1"] ) + #else: + # env.Append( CPPDEFINES=["_X86_=1"] ) env.Append( CPPPATH=["../winpcap/Include"] ) env.Append( LIBPATH=["../winpcap/Lib"] ) @@ -665,24 +743,20 @@ else: print( "No special config for [" + os.sys.platform + "] which probably means it won't work" ) -if not nojni and useJavaHome: - env.Append( CPPPATH=[ javaHome + "include" , javaHome + "include/" + javaOS ] ) - env.Append( LIBPATH=[ javaHome + "jre/lib/" + javaVersion + "/server" , javaHome + "jre/lib/" + javaVersion ] ) - - if not nojni: - javaLibs += [ "java" , "jvm" ] - - env.Append( LINKFLAGS="-Xlinker -rpath -Xlinker " + javaHome + "jre/lib/" + javaVersion + "/server" ) - env.Append( LINKFLAGS="-Xlinker -rpath -Xlinker " + javaHome + "jre/lib/" + javaVersion ) - if nix: env.Append( CPPFLAGS="-fPIC -fno-strict-aliasing -ggdb -pthread -Wall -Wsign-compare -Wno-unknown-pragmas -Winvalid-pch" ) + if linux: + env.Append( CPPFLAGS=" -Werror " ) env.Append( CXXFLAGS=" -Wnon-virtual-dtor " ) env.Append( LINKFLAGS=" -fPIC -pthread -rdynamic" ) env.Append( LIBS=[] ) + if linux and GetOption( "sharedclient" ): + env.Append( LINKFLAGS=" -Wl,--as-needed -Wl,-zdefs " ) + if debugBuild: env.Append( CPPFLAGS=" -O0 -fstack-protector " ); + env['ENV']['GLIBCXX_FORCE_NEW'] = 1; # play nice with valgrind else: env.Append( CPPFLAGS=" -O3" ) @@ -706,11 +780,12 @@ env.Append( CPPDEFINES=["USE_GDBSERVER"] ) # pre-compiled headers - if False and 'Gch' in dir( env ): + if usePCH and 'Gch' in dir( env ): print( "using precompiled headers" ) - env['Gch'] = env.Gch( [ "stdafx.h" ] )[0] - #Depends( "stdafx.o" , "stdafx.h.gch" ) - #SideEffect( "dummyGCHSideEffect" , "stdafx.h.gch" ) + env['Gch'] = env.Gch( [ "pch.h" ] )[0] + elif os.path.exists('pch.h.gch'): + print( "removing precompiled headers" ) + os.unlink('pch.h.gch') # gcc uses the file if it exists if usev8: env.Append( CPPPATH=["../v8/include/"] ) @@ -729,46 +804,6 @@ # --- check system --- -def getGitBranch(): - if not os.path.exists( ".git" ): - return None - - version = open( ".git/HEAD" ,'r' ).read().strip() - if not version.startswith( "ref: " ): - return version - version = version.split( "/" ) - version = version[len(version)-1] - return version - -def getGitBranchString( prefix="" , postfix="" ): - t = re.compile( '[/\\\]' ).split( os.getcwd() ) - if len(t) > 2 and t[len(t)-1] == "mongo": - par = t[len(t)-2] - m = re.compile( ".*_([vV]\d+\.\d+)$" ).match( par ) - if m is not None: - return prefix + m.group(1).lower() + postfix - if par.find("Nightly") > 0: - return "" - - - b = getGitBranch() - if b == None or b == "master": - return "" - return prefix + b + postfix - -def getGitVersion(): - if not os.path.exists( ".git" ): - return "nogitversion" - - version = open( ".git/HEAD" ,'r' ).read().strip() - if not version.startswith( "ref: " ): - return version - version = version[5:] - f = ".git/" + version - if not os.path.exists( f ): - return version - return open( f , 'r' ).read().strip() - def getSysInfo(): if windows: return "windows " + str( sys.getwindowsversion() ) @@ -781,14 +816,16 @@ return target def setupBuildInfoFile( outFile ): - version = getGitVersion() + version = utils.getGitVersion() + if len(moduleNames) > 0: + version = version + " modules: " + ','.join( moduleNames ) sysInfo = getSysInfo() contents = '\n'.join([ - '#include "stdafx.h"', + '#include "pch.h"', '#include ', '#include ', 'namespace mongo { const char * gitVersion(){ return "' + version + '"; } }', - 'namespace mongo { const char * sysInfo(){ return "' + sysInfo + ' BOOST_LIB_VERSION=" BOOST_LIB_VERSION ; } }', + 'namespace mongo { string sysInfo(){ return "' + sysInfo + ' BOOST_LIB_VERSION=" BOOST_LIB_VERSION ; } }', ]) contents += '\n'; @@ -811,7 +848,7 @@ return s -def doConfigure( myenv , needJava=True , needPcre=True , shell=False ): +def doConfigure( myenv , needPcre=True , shell=False ): conf = Configure(myenv) myenv["LINKFLAGS_CLEAN"] = list( myenv["LINKFLAGS"] ) myenv["LIBS_CLEAN"] = list( myenv["LIBS"] ) @@ -826,7 +863,7 @@ print( "can't find stdc++ library which is needed" ); Exit(1) - def myCheckLib( poss , failIfNotFound=False , java=False , staticOnly=False): + def myCheckLib( poss , failIfNotFound=False , staticOnly=False): if type( poss ) != types.ListType : poss = [poss] @@ -847,7 +884,7 @@ return True - if release and not java and not windows and failIfNotFound: + if release and not windows and failIfNotFound: print( "ERROR: can't find static version of: " + str( poss ) + " in: " + str( allPlaces ) ) Exit(1) @@ -856,7 +893,10 @@ return True if failIfNotFound: - print( "can't find library " + str( poss ) + " in " + str( myenv["LIBPATH"] ) ) + print( "can't find or link against library " + str( poss ) + " in " + str( myenv["LIBPATH"] ) ) + print( "see config.log for more information" ) + if windows: + print( "use scons --64 when cl.exe is 64 bit compiler" ) Exit(1) return False @@ -878,7 +918,7 @@ else: print( "WARNING: old version of boost - you should consider upgrading" ) - # this will add it iff it exists and works + # this will add it if it exists and works myCheckLib( [ "boost_system" + boostCompiler + "-mt" + boostVersion , "boost_system" + boostCompiler + boostVersion ] ) @@ -891,10 +931,6 @@ if not conf.CheckCXXHeader( "execinfo.h" ): myenv.Append( CPPDEFINES=[ "NOEXECINFO" ] ) - if needJava: - for j in javaLibs: - myCheckLib( j , True , True ) - if nix and needPcre: myCheckLib( "pcrecpp" , True ) myCheckLib( "pcre" , True ) @@ -917,7 +953,24 @@ if usesm: - myCheckLib( [ "mozjs" , "js", "js_static" ] , True ) + # see http://www.mongodb.org/pages/viewpageattachments.action?pageId=12157032 + J = [ "mozjs" , "js", "js_static" ] + if windows: + if msarch == "amd64": + if release: + J = [ "js64r", "js", "mozjs" , "js_static" ] + else: + J = "js64d" + print( "looking for js64d.lib for spidermonkey. (available at mongodb.org prebuilt)" ); + else: + if not force32: + print( "Assuming a 32 bit build is desired" ) + if release: + J = [ "js32r", "js", "mozjs" , "js_static" ] + else: + J = [ "js32d", "js", "mozjs" , "js_static" ] + + myCheckLib( J , True ) mozHeader = "js" if bigLibString(myenv).find( "mozjs" ) >= 0: mozHeader = "mozjs" @@ -944,18 +997,22 @@ myCheckLib( "ncurses" , True ) else: myenv.Append( LINKFLAGS=" /usr/lib/libreadline.dylib " ) + elif openbsd: + myenv.Append( CPPDEFINES=[ "USE_READLINE" ] ) + myCheckLib( "termcap" , True ) + myCheckLib( "readline" , True ) elif myCheckLib( "readline" , release and nix , staticOnly=release ): myenv.Append( CPPDEFINES=[ "USE_READLINE" ] ) myCheckLib( "ncurses" , staticOnly=release ) myCheckLib( "tinfo" , staticOnly=release ) else: - print( "warning: no readline, shell will be a bit ugly" ) + print( "\n*** notice: no readline library, mongo shell will not have nice interactive line editing ***\n" ) if linux: myCheckLib( "rt" , True ) # requires ports devel/libexecinfo to be installed - if freebsd: + if freebsd or openbsd: myCheckLib( "execinfo", True ) env.Append( LIBS=[ "execinfo" ] ) @@ -1005,7 +1062,13 @@ for s in source: f = open( str(s) , 'r' ) for l in f: - l = l.split("//")[0].strip() + + #strip comments. special case if // is potentially in a string + parts = l.split("//", 1) + if (len(parts) > 1) and ('"' not in parts[1]) and ('"' not in parts[1]): + l = parts[0] + + l = l.strip() if len ( l ) == 0: continue @@ -1047,7 +1110,6 @@ for l in open( str(source[0]) , 'r' ): l = l.strip() - l = l.split( "//" )[0] l = l.replace( '\\' , "\\\\" ) l = l.replace( '"' , "\\\"" ) @@ -1058,6 +1120,13 @@ out = open( outFile , 'w' ) out.write( h ) + out.close() + + # mongo_vstudio.cpp is in git as the .vcproj doesn't generate this file. + if outFile.find( "mongo.cpp" ) >= 0: + out = open( outFile.replace( "mongo" , "mongo_vstudio" ) , 'w' ) + out.write( h ) + out.close() return None @@ -1074,6 +1143,7 @@ clientEnv.Append( CPPPATH=["../"] ) clientEnv.Prepend( LIBS=[ "mongoclient"] ) clientEnv.Prepend( LIBPATH=["."] ) +#clientEnv["CPPDEFINES"].remove( "MONGO_EXPOSE_MACROS" ) l = clientEnv[ "LIBS" ] removeIfInList( l , "pcre" ) removeIfInList( l , "pcrecpp" ) @@ -1083,7 +1153,6 @@ testEnv.Prepend( LIBS=[ "mongotestfiles" ] ) testEnv.Prepend( LIBPATH=["."] ) - # ----- TARGETS ------ def checkErrorCodes(): @@ -1095,16 +1164,18 @@ checkErrorCodes() # main db target -mongod = env.Program( "mongod" , commonFiles + coreDbFiles + serverOnlyFiles + [ "db/db.cpp" ] ) +mongod = env.Program( "mongod" , commonFiles + coreDbFiles + coreServerFiles + serverOnlyFiles + [ "db/db.cpp" ] ) Default( mongod ) # tools -allToolFiles = commonFiles + coreDbFiles + serverOnlyFiles + [ "client/gridfs.cpp", "tools/tool.cpp" ] +allToolFiles = commonFiles + coreDbFiles + coreServerFiles + serverOnlyFiles + [ "client/gridfs.cpp", "tools/tool.cpp" ] normalTools = [ "dump" , "restore" , "export" , "import" , "files" , "stat" ] -env.Alias( "tools" , [ "mongo" + x for x in normalTools ] ) +env.Alias( "tools" , [ add_exe( "mongo" + x ) for x in normalTools ] ) for x in normalTools: env.Program( "mongo" + x , allToolFiles + [ "tools/" + x + ".cpp" ] ) +#some special tools +env.Program( "bsondump" , allToolFiles + [ "tools/bsondump.cpp" ] ) env.Program( "mongobridge" , allToolFiles + [ "tools/bridge.cpp" ] ) # mongos @@ -1114,7 +1185,8 @@ clientLibName = str( env.Library( "mongoclient" , allClientFiles )[0] ) if GetOption( "sharedclient" ): sharedClientLibName = str( env.SharedLibrary( "mongoclient" , allClientFiles )[0] ) -env.Library( "mongotestfiles" , commonFiles + coreDbFiles + serverOnlyFiles + ["client/gridfs.cpp"]) +env.Library( "mongotestfiles" , commonFiles + coreDbFiles + coreServerFiles + serverOnlyFiles + ["client/gridfs.cpp"]) +env.Library( "mongoshellfiles" , allClientFiles + coreServerFiles ) clientTests = [] @@ -1124,9 +1196,12 @@ clientTests += [ clientEnv.Program( "whereExample" , [ "client/examples/whereExample.cpp" ] ) ] clientTests += [ clientEnv.Program( "authTest" , [ "client/examples/authTest.cpp" ] ) ] clientTests += [ clientEnv.Program( "httpClientTest" , [ "client/examples/httpClientTest.cpp" ] ) ] +# clientTests += [ clientEnv.Program( "bsondemo" , [ "bson/bsondemo/bsondemo.cpp" ] ) ] #TODO # testing test = testEnv.Program( "test" , Glob( "dbtests/*.cpp" ) ) +if windows: + testEnv.Alias( "test" , "test.exe" ) perftest = testEnv.Program( "perftest", [ "dbtests/framework.cpp" , "dbtests/perf/perftest.cpp" ] ) clientTests += [ clientEnv.Program( "clientTest" , [ "client/examples/clientTest.cpp" ] ) ] @@ -1149,7 +1224,6 @@ env.JSConcat( "shell/mongo-server.jsall" , [ "shell/servers.js"] ) env.JSHeader( "shell/mongo-server.jsall" ) - shellEnv = env.Clone(); if release and ( ( darwin and force64 ) or linux64 ): @@ -1169,12 +1243,9 @@ shellEnv["CPPPATH"].remove( "/usr/64/include" ) shellEnv["LIBPATH"].remove( "/usr/64/lib" ) shellEnv.Append( CPPPATH=filterExists(["/sw/include" , "/opt/local/include"]) ) - shellEnv.Append( LIBPATH=filterExists(["/sw/lib/", "/opt/local/lib" , "/usr/lib"]) ) + shellEnv.Append( LIBPATH=filterExists(["/sw/lib/", "/opt/local/lib" , "/usr/lib", "/usr/local/lib" ]) ) l = shellEnv["LIBS"] - if linux64: - removeIfInList( l , "java" ) - removeIfInList( l , "jvm" ) removeIfInList( l , "pcre" ) removeIfInList( l , "pcrecpp" ) @@ -1182,22 +1253,25 @@ if windows: shellEnv.Append( LIBS=["winmm.lib"] ) - coreShellFiles = [ "shell/dbshell.cpp" , "shell/utils.cpp" , "shell/mongo-server.cpp" ] + coreShellFiles = [ "shell/dbshell.cpp" , "shell/shell_utils.cpp" , "shell/mongo-server.cpp" ] if weird: shell32BitFiles = coreShellFiles for f in allClientFiles: shell32BitFiles.append( "32bit/" + str( f ) ) + for f in scriptingFiles: + shell32BitFiles.append( "32bit/" + str( f ) ) shellEnv.VariantDir( "32bit" , "." ) + shellEnv.Append( CPPPATH=["32bit/"] ) else: shellEnv.Prepend( LIBPATH=[ "." ] ) - shellEnv = doConfigure( shellEnv , needPcre=False , needJava=False , shell=True ) + shellEnv = doConfigure( shellEnv , needPcre=False , shell=True ) if weird: mongo = shellEnv.Program( "mongo" , shell32BitFiles ) else: - shellEnv.Prepend( LIBS=[ "mongoclient"] ) + shellEnv.Prepend( LIBS=[ "mongoshellfiles"] ) mongo = shellEnv.Program( "mongo" , coreShellFiles ) if weird: @@ -1207,185 +1281,85 @@ # ---- RUNNING TESTS ---- -testEnv.Alias( "dummySmokeSideEffect", [], [] ) - -def addSmoketest( name, deps, actions ): - if type( actions ) == type( list() ): - actions = [ testSetup ] + actions - else: - actions = [ testSetup, actions ] - testEnv.Alias( name, deps, actions ) - testEnv.AlwaysBuild( name ) +smokeEnv = testEnv.Clone() +smokeEnv['ENV']['PATH']=os.environ['PATH'] +smokeEnv.Alias( "dummySmokeSideEffect", [], [] ) + +smokeFlags = [] + +# Ugh. Frobbing the smokeFlags must precede using them to construct +# actions, I think. +if GetOption( 'smokedbprefix') is not None: + smokeFlags += ['--smoke-db-prefix', GetOption( 'smokedbprefix')] + +if 'startMongodSmallOplog' in COMMAND_LINE_TARGETS: + smokeFlags += ["--small-oplog"] + +def addTest(name, deps, actions): + smokeEnv.Alias( name, deps, actions ) + smokeEnv.AlwaysBuild( name ) # Prevent smoke tests from running in parallel - testEnv.SideEffect( "dummySmokeSideEffect", name ) - -def ensureDir( name ): - d = os.path.dirname( name ) - if not os.path.exists( d ): - print( "Creating dir: " + name ); - os.makedirs( d ) - if not os.path.exists( d ): - print( "Failed to create dir: " + name ); - Exit( 1 ) - -def ensureTestDirs(): - ensureDir( "/tmp/unittest/" ) - ensureDir( "/data/" ) - ensureDir( "/data/db/" ) - -def testSetup( env , target , source ): - ensureTestDirs() - -if len( COMMAND_LINE_TARGETS ) == 1 and str( COMMAND_LINE_TARGETS[0] ) == "test": - ensureDir( "/tmp/unittest/" ); - -addSmoketest( "smoke", [ add_exe( "test" ) ] , [ test[ 0 ].abspath ] ) -addSmoketest( "smokePerf", [ "perftest" ] , [ perftest[ 0 ].abspath ] ) - -clientExec = [ x[0].abspath for x in clientTests ] -def runClientTests( env, target, source ): - global clientExec - global mongodForTestsPort - import subprocess - for i in clientExec: - if subprocess.call( [ i, "--port", mongodForTestsPort ] ) != 0: - return True - if subprocess.Popen( [ mongod[0].abspath, "msg", "ping", mongodForTestsPort ], stdout=subprocess.PIPE ).communicate()[ 0 ].count( "****ok" ) == 0: - return True - if subprocess.call( [ mongod[0].abspath, "msg", "ping", mongodForTestsPort ] ) != 0: - return True - return False -addSmoketest( "smokeClient" , clientExec, runClientTests ) -addSmoketest( "mongosTest" , [ mongos[0].abspath ] , [ mongos[0].abspath + " --test" ] ) + smokeEnv.SideEffect( "dummySmokeSideEffect", name ) -def jsSpec( suffix ): - import os.path - args = [ os.path.dirname( mongo[0].abspath ), "jstests" ] + suffix - return apply( os.path.join, args ) - -def jsDirTestSpec( dir ): - path = jsSpec( [ dir + '/*.js' ] ) - paths = [x.abspath for x in Glob( path ) ] - return mongo[0].abspath + " --nodb " + ' '.join( paths ) +def addSmoketest( name, deps ): + addTest(name, deps, [ "python buildscripts/smoke.py " + " ".join(smokeFlags) + ' ' + name ]) -def runShellTest( env, target, source ): - global mongodForTestsPort - import subprocess - target = str( target[0] ) - if target == "smokeJs": - spec = [ jsSpec( [ "_runner.js" ] ) ] - elif target == "smokeQuota": - g = Glob( jsSpec( [ "quota/*.js" ] ) ) - spec = [ x.abspath for x in g ] - elif target == "smokeJsPerf": - g = Glob( jsSpec( [ "perf/*.js" ] ) ) - spec = [ x.abspath for x in g ] - elif target == "smokeJsSlow": - spec = [x.abspath for x in Glob(jsSpec(["slow/*"]))] - elif target == "smokeParallel": - spec = [x.abspath for x in Glob(jsSpec(["parallel/*"]))] - else: - print( "invalid target for runShellTest()" ) - Exit( 1 ) - return subprocess.call( [ mongo[0].abspath, "--port", mongodForTestsPort ] + spec ) +addSmoketest( "smoke", [ add_exe( "test" ) ] ) +addSmoketest( "smokePerf", [ "perftest" ] ) +addSmoketest( "smokeClient" , clientTests ) +addSmoketest( "mongosTest" , [ mongos[0].abspath ] ) # These tests require the mongo shell if not onlyServer and not noshell: - addSmoketest( "smokeJs", [add_exe("mongo")], runShellTest ) - addSmoketest( "smokeClone", [ "mongo", "mongod" ], [ jsDirTestSpec( "clone" ) ] ) - addSmoketest( "smokeRepl", [ "mongo", "mongod", "mongobridge" ], [ jsDirTestSpec( "repl" ) ] ) - addSmoketest( "smokeDisk", [ add_exe( "mongo" ), add_exe( "mongod" ) ], [ jsDirTestSpec( "disk" ) ] ) - addSmoketest( "smokeAuth", [ add_exe( "mongo" ), add_exe( "mongod" ) ], [ jsDirTestSpec( "auth" ) ] ) - addSmoketest( "smokeParallel", [ add_exe( "mongo" ), add_exe( "mongod" ) ], runShellTest ) - addSmoketest( "smokeSharding", [ "mongo", "mongod", "mongos" ], [ jsDirTestSpec( "sharding" ) ] ) - addSmoketest( "smokeJsPerf", [ "mongo" ], runShellTest ) - addSmoketest("smokeJsSlow", [add_exe("mongo")], runShellTest) - addSmoketest( "smokeQuota", [ "mongo" ], runShellTest ) - addSmoketest( "smokeTool", [ add_exe( "mongo" ) ], [ jsDirTestSpec( "tool" ) ] ) - -mongodForTests = None -mongodForTestsPort = "27017" - -def startMongodWithArgs(*args): - global mongodForTests - global mongodForTestsPort - global mongod - if mongodForTests: - return - mongodForTestsPort = "32000" - import os - ensureTestDirs() - dirName = "/data/db/sconsTests/" - ensureDir( dirName ) - from subprocess import Popen - mongodForTests = Popen([mongod[0].abspath, "--port", mongodForTestsPort, - "--dbpath", dirName] + list(args)) - - if not utils.didMongodStart( 32000 ): - print( "Failed to start mongod" ) - mongodForTests = None - Exit( 1 ) - -def startMongodForTests( env, target, source ): - return startMongodWithArgs() - -def startMongodSmallOplog(env, target, source): - return startMongodWithArgs("--master", "--oplogSize", "10") - -def stopMongodForTests(): - global mongodForTests - if not mongodForTests: - return - if mongodForTests.poll() is not None: - print( "Failed to start mongod" ) - mongodForTests = None - Exit( 1 ) - try: - # This function not available in Python 2.5 - mongodForTests.terminate() - except AttributeError: - if windows: - import win32process - win32process.TerminateProcess(mongodForTests._handle, -1) - else: - from os import kill - kill( mongodForTests.pid, 15 ) - mongodForTests.wait() - -testEnv.Alias( "startMongod", [add_exe("mongod")], [startMongodForTests] ); -testEnv.AlwaysBuild( "startMongod" ); -testEnv.SideEffect( "dummySmokeSideEffect", "startMongod" ) - -testEnv.Alias( "startMongodSmallOplog", [add_exe("mongod")], [startMongodSmallOplog] ); -testEnv.AlwaysBuild( "startMongodSmallOplog" ); -testEnv.SideEffect( "dummySmokeSideEffect", "startMongodSmallOplog" ) + addSmoketest( "smokeJs", [add_exe("mongo")] ) + addSmoketest( "smokeClone", [ "mongo", "mongod" ] ) + addSmoketest( "smokeRepl", [ "mongo", "mongod", "mongobridge" ] ) + addSmoketest( "smokeReplSets", [ "mongo", "mongod", "mongobridge" ] ) + addSmoketest( "smokeDisk", [ add_exe( "mongo" ), add_exe( "mongod" ) ] ) + addSmoketest( "smokeAuth", [ add_exe( "mongo" ), add_exe( "mongod" ) ] ) + addSmoketest( "smokeParallel", [ add_exe( "mongo" ), add_exe( "mongod" ) ] ) + addSmoketest( "smokeSharding", [ "mongo", "mongod", "mongos" ] ) + addSmoketest( "smokeJsPerf", [ "mongo" ] ) + addSmoketest("smokeJsSlowNightly", [add_exe("mongo")]) + addSmoketest("smokeJsSlowWeekly", [add_exe("mongo")]) + addSmoketest( "smokeQuota", [ "mongo" ] ) + addSmoketest( "smokeTool", [ add_exe( "mongo" ) ] ) + +# Note: although the test running logic has been moved to +# buildscripts/smoke.py, the interface to running the tests has been +# something like 'scons startMongod '; startMongod is now a +# no-op, and should go away eventually. +smokeEnv.Alias( "startMongod", [add_exe("mongod")]); +smokeEnv.AlwaysBuild( "startMongod" ); +smokeEnv.SideEffect( "dummySmokeSideEffect", "startMongod" ) + +smokeEnv.Alias( "startMongodSmallOplog", [add_exe("mongod")], [] ); +smokeEnv.AlwaysBuild( "startMongodSmallOplog" ); +smokeEnv.SideEffect( "dummySmokeSideEffect", "startMongodSmallOplog" ) def addMongodReqTargets( env, target, source ): - mongodReqTargets = [ "smokeClient", "smokeJs", "smokeQuota" ] + mongodReqTargets = [ "smokeClient", "smokeJs" ] for target in mongodReqTargets: - testEnv.Depends( target, "startMongod" ) - testEnv.Depends( "smokeAll", target ) + smokeEnv.Depends( target, "startMongod" ) + smokeEnv.Depends( "smokeAll", target ) -testEnv.Alias( "addMongodReqTargets", [], [addMongodReqTargets] ) -testEnv.AlwaysBuild( "addMongodReqTargets" ) +smokeEnv.Alias( "addMongodReqTargets", [], [addMongodReqTargets] ) +smokeEnv.AlwaysBuild( "addMongodReqTargets" ) -testEnv.Alias( "smokeAll", [ "smoke", "mongosTest", "smokeClone", "smokeRepl", "addMongodReqTargets", "smokeDisk", "smokeAuth", "smokeSharding", "smokeTool" ] ) -testEnv.AlwaysBuild( "smokeAll" ) +smokeEnv.Alias( "smokeAll", [ "smoke", "mongosTest", "smokeClone", "smokeRepl", "addMongodReqTargets", "smokeDisk", "smokeAuth", "smokeSharding", "smokeTool" ] ) +smokeEnv.AlwaysBuild( "smokeAll" ) def addMongodReqNoJsTargets( env, target, source ): mongodReqTargets = [ "smokeClient" ] for target in mongodReqTargets: - testEnv.Depends( target, "startMongod" ) - testEnv.Depends( "smokeAllNoJs", target ) - -testEnv.Alias( "addMongodReqNoJsTargets", [], [addMongodReqNoJsTargets] ) -testEnv.AlwaysBuild( "addMongodReqNoJsTargets" ) + smokeEnv.Depends( target, "startMongod" ) + smokeEnv.Depends( "smokeAllNoJs", target ) -testEnv.Alias( "smokeAllNoJs", [ "smoke", "mongosTest", "addMongodReqNoJsTargets" ] ) -testEnv.AlwaysBuild( "smokeAllNoJs" ) +smokeEnv.Alias( "addMongodReqNoJsTargets", [], [addMongodReqNoJsTargets] ) +smokeEnv.AlwaysBuild( "addMongodReqNoJsTargets" ) -import atexit -atexit.register( stopMongodForTests ) +smokeEnv.Alias( "smokeAllNoJs", [ "smoke", "mongosTest", "addMongodReqNoJsTargets" ] ) +smokeEnv.AlwaysBuild( "smokeAllNoJs" ) def recordPerformance( env, target, source ): from buildscripts import benchmark_tools @@ -1406,7 +1380,7 @@ sub = { "benchmark": { "project": "http://github.com/mongodb/mongo", "description": "" }, "trial": {} } sub[ "benchmark" ][ "name" ] = name sub[ "benchmark" ][ "tags" ] = [ "c++", re.match( "(.*)__", name ).group( 1 ) ] - sub[ "trial" ][ "server_hash" ] = getGitVersion() + sub[ "trial" ][ "server_hash" ] = utils.getGitVersion() sub[ "trial" ][ "client_hash" ] = "" sub[ "trial" ][ "result" ] = val try: @@ -1416,7 +1390,7 @@ print( sys.exc_info() ) return False -addSmoketest( "recordPerf", [ "perftest" ] , [ recordPerformance ] ) +addTest( "recordPerf", [ "perftest" ] , [ recordPerformance ] ) def run_shell_tests(env, target, source): from buildscripts import test_shell @@ -1426,14 +1400,27 @@ env.Alias("test_shell", [], [run_shell_tests]) env.AlwaysBuild("test_shell") +# ---- Docs ---- +def build_docs(env, target, source): + from buildscripts import docs + docs.main() + +env.Alias("docs", [], [build_docs]) +env.AlwaysBuild("docs") + # ---- INSTALL ------- def getSystemInstallName(): n = platform + "-" + processor if static: n += "-static" + if GetOption("nostrip"): + n += "-debugsymbols" if nix and os.uname()[2].startswith( "8." ): n += "-tiger" + + if len(moduleNames) > 0: + n += "-" + "-".join( moduleNames ) try: import settings @@ -1450,13 +1437,16 @@ return n def getCodeVersion(): - fullSource = open( "stdafx.cpp" , "r" ).read() + fullSource = open( "util/version.cpp" , "r" ).read() allMatches = re.findall( r"versionString.. = \"(.*?)\"" , fullSource ); if len(allMatches) != 1: print( "can't find version # in code" ) return None return allMatches[0] +if getCodeVersion() == None: + Exit(-1) + def getDistName( sofar ): global distName global dontReplacePackage @@ -1473,15 +1463,18 @@ return version - return getGitBranchString( "" , "-" ) + today.strftime( "%Y-%m-%d" ) + return utils.getGitBranchString( "" , "-" ) + today.strftime( "%Y-%m-%d" ) if distBuild: - from datetime import date - today = date.today() - installDir = "mongodb-" + getSystemInstallName() + "-" - installDir += getDistName( installDir ) - print "going to make dist: " + installDir + if isDriverBuild(): + installDir = GetOption( "prefix" ) + else: + from datetime import date + today = date.today() + installDir = "mongodb-" + getSystemInstallName() + "-" + installDir += getDistName( installDir ) + print "going to make dist: " + installDir # binaries @@ -1496,6 +1489,9 @@ allBinaries = [] def installBinary( e , name ): + if not installSetup.binaries: + return + global allBinaries if windows: @@ -1518,6 +1514,7 @@ for x in normalTools: installBinary( env , "mongo" + x ) +installBinary( env , "bsondump" ) if mongosniff_built: installBinary(env, "mongosniff") @@ -1529,36 +1526,50 @@ installBinary( env , "mongo" ) env.Alias( "all" , allBinaries ) - - -# NOTE: In some cases scons gets confused between installation targets and build -# dependencies. Here, we use InstallAs instead of Install to prevent such confusion -# on a case-by-case basis. +env.Alias( "core" , [ add_exe( "mongo" ) , add_exe( "mongod" ) , add_exe( "mongos" ) ] ) #headers -for id in [ "", "util/", "db/" , "client/" ]: - env.Install( installDir + "/include/mongo/" + id , Glob( id + "*.h" ) ) +if installSetup.headers: + for id in [ "", "util/", "util/mongoutils/", "util/concurrency/", "db/" , "db/stats/" , "db/repl/" , "client/" , "bson/", "bson/util/" , "s/" , "scripting/" ]: + env.Install( installDir + "/" + installSetup.headerRoot + "/mongo/" + id , Glob( id + "*.h" ) ) + env.Install( installDir + "/" + installSetup.headerRoot + "/mongo/" + id , Glob( id + "*.hpp" ) ) + +if installSetup.clientSrc: + for x in allClientFiles: + x = str(x) + env.Install( installDir + "/mongo/" + x.rpartition( "/" )[0] , x ) #lib -env.Install( installDir + "/" + nixLibPrefix, clientLibName ) -if usejvm: - env.Install( installDir + "/" + nixLibPrefix + "/mongo/jars" , Glob( "jars/*" ) ) +if installSetup.libraries: + env.Install( installDir + "/" + nixLibPrefix, clientLibName ) + if GetOption( "sharedclient" ): + env.Install( installDir + "/" + nixLibPrefix, sharedClientLibName ) + #textfiles -if distBuild or release: - #don't want to install these /usr/local/ for example - env.Install( installDir , "distsrc/README" ) - env.Install( installDir , "distsrc/THIRD-PARTY-NOTICES" ) - env.Install( installDir , "distsrc/GNU-AGPL-3.0" ) +if installSetup.bannerDir: + for x in os.listdir( installSetup.bannerDir ): + full = installSetup.bannerDir + "/" + x + if os.path.isdir( full ): + continue + if x.find( "~" ) >= 0: + continue + env.Install( installDir , full ) + +if installSetup.clientTestsDir: + for x in os.listdir( installSetup.clientTestsDir ): + full = installSetup.clientTestsDir + "/" + x + if os.path.isdir( full ): + continue + if x.find( "~" ) >= 0: + continue + env.Install( installDir + '/' + installSetup.clientTestsDir , full ) #final alias env.Alias( "install" , installDir ) # aliases -if windows: - env.Alias( "mongoclient" , "mongoclient.lib" ) -else: - env.Alias( "mongoclient" , "libmongoclient.a" ) +env.Alias( "mongoclient" , GetOption( "sharedclient" ) and sharedClientLibName or clientLibName ) # ---- CONVENIENCE ---- @@ -1590,7 +1601,7 @@ if remotePrefix is None: if distName is None: - remotePrefix = getGitBranchString( "-" ) + "-latest" + remotePrefix = utils.getGitBranchString( "-" ) + "-latest" else: remotePrefix = "-" + distName @@ -1615,8 +1626,10 @@ name = name.lower() else: name = remoteName - - if platformDir: + + if isDriverBuild(): + name = "cxx-driver/" + name + elif platformDir: name = platform + "/" + name print( "uploading " + localName + " to http://s3.amazonaws.com/" + s.name + "/" + name ) @@ -1637,16 +1650,35 @@ s3push( distFile , "mongodb" ) env.Append( TARFLAGS=" -z " ) -if windows: - distFile = installDir + ".zip" - env.Zip( distFile , installDir ) -else: - distFile = installDir + ".tgz" - env.Tar( distFile , installDir ) -env.Alias( "dist" , distFile ) -env.Alias( "s3dist" , [ "install" , distFile ] , [ s3dist ] ) -env.AlwaysBuild( "s3dist" ) +if installDir[-1] != "/": + if windows: + distFile = installDir + ".zip" + env.Zip( distFile , installDir ) + else: + distFile = installDir + ".tgz" + env.Tar( distFile , installDir ) + + env.Alias( "dist" , distFile ) + env.Alias( "s3dist" , [ "install" , distFile ] , [ s3dist ] ) + env.AlwaysBuild( "s3dist" ) + + +# client dist +def build_and_test_client(env, target, source): + from subprocess import call + + if GetOption("extrapath") is not None: + scons_command = ["scons", "--extrapath=" + GetOption("extrapath")] + else: + scons_command = ["scons"] + + call(scons_command + ["libmongoclient.a", "clientTests"], cwd=installDir) + + return bool(call(["python", "buildscripts/smoke.py", + "--test-path", installDir, "smokeClient"])) +env.Alias("clientBuild", [mongod, installDir], [build_and_test_client]) +env.AlwaysBuild("clientBuild") def clean_old_dist_builds(env, target, source): prefix = "mongodb-%s-%s" % (platform, processor) diff -Nru mongodb-1.4.4/scripting/engine.cpp mongodb-1.6.3/scripting/engine.cpp --- mongodb-1.4.4/scripting/engine.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/engine.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "engine.h" #include "../util/file.h" #include "../client/dbclient.h" @@ -73,6 +73,9 @@ // TODO: make signed builder.appendDate( fieldName , Date_t((unsigned long long)getNumber( scopeName )) ); break; + case Code: + builder.appendCode( fieldName , getString( scopeName ).c_str() ); + break; default: stringstream temp; temp << "can't append type from:"; @@ -93,7 +96,7 @@ path p( filename ); if ( ! exists( p ) ){ - cout << "file [" << filename << "] doesn't exist" << endl; + log() << "file [" << filename << "] doesn't exist" << endl; if ( assertOnError ) assert( 0 ); return false; @@ -113,7 +116,7 @@ } if (empty){ - cout << "directory [" << filename << "] doesn't have any *.js files" << endl; + log() << "directory [" << filename << "] doesn't have any *.js files" << endl; if ( assertOnError ) assert( 0 ); return false; @@ -167,6 +170,7 @@ static DBClientBase * db = createDirectClient(); auto_ptr c = db->query( coll , Query() ); + assert( c.get() ); set thisTime; @@ -228,7 +232,7 @@ class ScopeCache { public: - ScopeCache(){ + ScopeCache() : _mutex("ScopeCache") { _magic = 17; } @@ -301,7 +305,9 @@ _real = 0; } else { - log() << "warning: scopeCache is empty!" << endl; + // this means that the Scope was killed from a different thread + // for example a cursor got timed out that has a $where clause + log(3) << "warning: scopeCache is empty!" << endl; delete _real; _real = 0; } @@ -421,5 +427,15 @@ void ( *ScriptEngine::_connectCallback )( DBClientWithCommands & ) = 0; ScriptEngine * globalScriptEngine; + + bool hasJSReturn( const string& code ){ + size_t x = code.find( "return" ); + if ( x == string::npos ) + return false; + + return + ( x == 0 || ! isalpha( code[x-1] ) ) && + ! isalpha( code[x+6] ); + } } - \ No newline at end of file + diff -Nru mongodb-1.4.4/scripting/engine.h mongodb-1.6.3/scripting/engine.h --- mongodb-1.4.4/scripting/engine.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/engine.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../db/jsobj.h" extern const char * jsconcatcode; // TODO: change name to mongoJSCode @@ -162,5 +162,7 @@ static void ( *_connectCallback )( DBClientWithCommands & ); }; + bool hasJSReturn( const string& s ); + extern ScriptEngine * globalScriptEngine; } diff -Nru mongodb-1.4.4/scripting/engine_java.cpp mongodb-1.6.3/scripting/engine_java.cpp --- mongodb-1.4.4/scripting/engine_java.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/engine_java.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ */ -#include "stdafx.h" +#include "pch.h" #include "engine_java.h" #include #include diff -Nru mongodb-1.4.4/scripting/engine_java.h mongodb-1.6.3/scripting/engine_java.h --- mongodb-1.4.4/scripting/engine_java.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/engine_java.h 2010-09-24 10:02:42.000000000 -0700 @@ -19,10 +19,9 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include -#include #include #include diff -Nru mongodb-1.4.4/scripting/engine_spidermonkey.cpp mongodb-1.6.3/scripting/engine_spidermonkey.cpp --- mongodb-1.4.4/scripting/engine_spidermonkey.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/engine_spidermonkey.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,15 +15,14 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "engine_spidermonkey.h" - #include "../client/dbclient.h" #ifndef _WIN32 #include #undef assert -#define assert xassert +#define assert MONGO_assert #endif #define smuassert( cx , msg , val ) \ @@ -38,6 +37,12 @@ } namespace mongo { + + class InvalidUTF8Exception : public UserException { + public: + InvalidUTF8Exception() : UserException( 9006 , "invalid utf8" ){ + } + }; string trim( string s ){ while ( s.size() && isspace( s[0] ) ) @@ -128,6 +133,54 @@ return new BSONFieldIterator( this ); } + class TraverseStack { + public: + TraverseStack(){ + _o = 0; + _parent = 0; + } + + TraverseStack( JSObject * o , const TraverseStack * parent ){ + _o = o; + _parent = parent; + } + + TraverseStack dive( JSObject * o ) const { + if ( o ){ + uassert( 13076 , (string)"recursive toObject" , ! has( o ) ); + } + return TraverseStack( o , this ); + } + + int depth() const { + int d = 0; + const TraverseStack * s = _parent; + while ( s ){ + s = s->_parent; + d++; + } + return d; + } + + bool isTop() const { + return _parent == 0; + } + + bool has( JSObject * o ) const { + if ( ! o ) + return false; + const TraverseStack * s = this; + while ( s ){ + if ( s->_o == o ) + return true; + s = s->_parent; + } + return false; + } + + JSObject * _o; + const TraverseStack * _parent; + }; class Convertor : boost::noncopyable { public: @@ -171,7 +224,7 @@ ( (boost::uint64_t)(boost::uint32_t)getNumber( o , "top" ) << 32 ) + ( boost::uint32_t)( getNumber( o , "bottom" ) ); } else { - val = (boost::uint64_t) getNumber( o, "floatApprox" ); + val = (boost::uint64_t)(boost::int64_t) getNumber( o, "floatApprox" ); } return val; } @@ -198,7 +251,7 @@ return oid; } - BSONObj toObject( JSObject * o , int depth = 0){ + BSONObj toObject( JSObject * o , const TraverseStack& stack=TraverseStack() ){ if ( ! o ) return BSONObj(); @@ -222,10 +275,10 @@ if ( ! appendSpecialDBObject( this , b , "value" , OBJECT_TO_JSVAL( o ) , o ) ){ - if ( depth == 0 ){ + if ( stack.isTop() ){ jsval theid = getProperty( o , "_id" ); if ( ! JSVAL_IS_VOID( theid ) ){ - append( b , "_id" , theid , EOO , depth + 1 ); + append( b , "_id" , theid , EOO , stack.dive( o ) ); } } @@ -237,10 +290,10 @@ jsval nameval; assert( JS_IdToValue( _context ,id , &nameval ) ); string name = toString( nameval ); - if ( depth == 0 && name == "_id" ) + if ( stack.isTop() && name == "_id" ) continue; - append( b , name , getProperty( o , name.c_str() ) , orig[name].type() , depth + 1 ); + append( b , name , getProperty( o , name.c_str() ) , orig[name].type() , stack.dive( o ) ); } JS_DestroyIdArray( _context , properties ); @@ -271,39 +324,39 @@ assert( s[0] == '/' ); s = s.substr(1); string::size_type end = s.rfind( '/' ); - b.appendRegex( name.c_str() , s.substr( 0 , end ).c_str() , s.substr( end + 1 ).c_str() ); + b.appendRegex( name , s.substr( 0 , end ).c_str() , s.substr( end + 1 ).c_str() ); } - void append( BSONObjBuilder& b , string name , jsval val , BSONType oldType = EOO , int depth=0 ){ + void append( BSONObjBuilder& b , string name , jsval val , BSONType oldType = EOO , const TraverseStack& stack=TraverseStack() ){ //cout << "name: " << name << "\t" << typeString( val ) << " oldType: " << oldType << endl; switch ( JS_TypeOfValue( _context , val ) ){ - case JSTYPE_VOID: b.appendUndefined( name.c_str() ); break; - case JSTYPE_NULL: b.appendNull( name.c_str() ); break; + case JSTYPE_VOID: b.appendUndefined( name ); break; + case JSTYPE_NULL: b.appendNull( name ); break; case JSTYPE_NUMBER: { double d = toNumber( val ); if ( oldType == NumberInt && ((int)d) == d ) - b.append( name.c_str() , (int)d ); + b.append( name , (int)d ); else - b.append( name.c_str() , d ); + b.append( name , d ); break; } - case JSTYPE_STRING: b.append( name.c_str() , toString( val ) ); break; - case JSTYPE_BOOLEAN: b.appendBool( name.c_str() , toBoolean( val ) ); break; + case JSTYPE_STRING: b.append( name , toString( val ) ); break; + case JSTYPE_BOOLEAN: b.appendBool( name , toBoolean( val ) ); break; case JSTYPE_OBJECT: { JSObject * o = JSVAL_TO_OBJECT( val ); if ( ! o || o == JSVAL_NULL ){ - b.appendNull( name.c_str() ); + b.appendNull( name ); } else if ( ! appendSpecialDBObject( this , b , name , val , o ) ){ - BSONObj sub = toObject( o , depth ); + BSONObj sub = toObject( o , stack ); if ( JS_IsArrayObject( _context , o ) ){ - b.appendArray( name.c_str() , sub ); + b.appendArray( name , sub ); } else { - b.append( name.c_str() , sub ); + b.append( name , sub ); } } break; @@ -315,7 +368,7 @@ appendRegex( b , name , s ); } else { - b.appendCode( name.c_str() , getFunctionCode( val ).c_str() ); + b.appendCode( name , getFunctionCode( val ).c_str() ); } break; } @@ -334,7 +387,7 @@ } bool isSimpleStatement( const string& code ){ - if ( code.find( "return" ) != string::npos ) + if ( hasJSReturn( code ) ) return false; if ( code.find( ";" ) != string::npos && @@ -416,7 +469,7 @@ JSFunction * func = JS_CompileFunction( _context , assoc , fname.str().c_str() , params.size() , paramArray.get() , code.c_str() , strlen( code.c_str() ) , "nofile_b" , 0 ); if ( ! func ){ - cout << "compile failed for: " << raw << endl; + log() << "compile failed for: " << raw << endl; return 0; } gcName = "cf normal"; @@ -449,11 +502,11 @@ free( dst ); if ( ! res ){ - cout << "decode failed. probably invalid utf-8 string [" << c << "]" << endl; + tlog() << "decode failed. probably invalid utf-8 string [" << c << "]" << endl; jsval v; if ( JS_GetPendingException( _context , &v ) ) - cout << "\t why: " << toString( v ) << endl; - throw UserException( 9006 , "invalid utf8" ); + tlog() << "\t why: " << toString( v ) << endl; + throw InvalidUTF8Exception(); } assert( s ); @@ -479,6 +532,24 @@ return OBJECT_TO_JSVAL( o ); } + void makeLongObj( long long n, JSObject * o ) { + boost::uint64_t val = (boost::uint64_t)n; + CHECKNEWOBJECT(o,_context,"NumberLong1"); + setProperty( o , "floatApprox" , toval( (double)(boost::int64_t)( val ) ) ); + if ( (boost::int64_t)val != (boost::int64_t)(double)(boost::int64_t)( val ) ) { + // using 2 doubles here instead of a single double because certain double + // bit patterns represent undefined values and sm might trash them + setProperty( o , "top" , toval( (double)(boost::uint32_t)( val >> 32 ) ) ); + setProperty( o , "bottom" , toval( (double)(boost::uint32_t)( val & 0x00000000ffffffff ) ) ); + } + } + + jsval toval( long long n ) { + JSObject * o = JS_NewObject( _context , &numberlong_class , 0 , 0 ); + makeLongObj( n, o ); + return OBJECT_TO_JSVAL( o ); + } + jsval toval( const BSONElement& e ){ switch( e.type() ){ @@ -549,7 +620,9 @@ } case Code:{ JSFunction * func = compileFunction( e.valuestr() ); - return OBJECT_TO_JSVAL( JS_GetFunctionObject( func ) ); + if ( func ) + return OBJECT_TO_JSVAL( JS_GetFunctionObject( func ) ); + return JSVAL_NULL; } case CodeWScope:{ JSFunction * func = compileFunction( e.codeWScopeCode() ); @@ -578,17 +651,7 @@ return OBJECT_TO_JSVAL( o ); } case NumberLong: { - boost::uint64_t val = (boost::uint64_t)e.numberLong(); - JSObject * o = JS_NewObject( _context , &numberlong_class , 0 , 0 ); - CHECKNEWOBJECT(o,_context,"NumberLong1"); - setProperty( o , "floatApprox" , toval( (double)(boost::int64_t)( val ) ) ); - if ( (boost::int64_t)val != (boost::int64_t)(double)(boost::int64_t)( val ) ) { - // using 2 doubles here instead of a single double because certain double - // bit patterns represent undefined values and sm might trash them - setProperty( o , "top" , toval( (double)(boost::uint32_t)( val >> 32 ) ) ); - setProperty( o , "bottom" , toval( (double)(boost::uint32_t)( val & 0x00000000ffffffff ) ) ); - } - return OBJECT_TO_JSVAL( o ); + return toval( e.numberLong() ); } case DBRef: { JSObject * o = JS_NewObject( _context , &dbpointer_class , 0 , 0 ); @@ -607,15 +670,16 @@ CHECKNEWOBJECT(o,_context,"Bindata_BinData1"); int len; const char * data = e.binData( len ); - assert( JS_SetPrivate( _context , o , new BinDataHolder( data ) ) ); + assert( data ); + assert( JS_SetPrivate( _context , o , new BinDataHolder( data , len ) ) ); - setProperty( o , "len" , toval( len ) ); - setProperty( o , "type" , toval( (int)e.binDataType() ) ); + setProperty( o , "len" , toval( (double)len ) ); + setProperty( o , "type" , toval( (double)e.binDataType() ) ); return OBJECT_TO_JSVAL( o ); } } - cout << "toval: unknown type: " << e.type() << endl; + log() << "toval: unknown type: " << (int) e.type() << endl; uassert( 10218 , "not done: toval" , 0 ); return 0; } @@ -824,13 +888,15 @@ // --- global helpers --- JSBool native_print( JSContext * cx , JSObject * obj , uintN argc, jsval *argv, jsval *rval ){ + stringstream ss; Convertor c( cx ); for ( uintN i=0; i 0 ) - cout << " "; - cout << c.toString( argv[i] ); + ss << " "; + ss << c.toString( argv[i] ); } - cout << endl; + ss << "\n"; + Logstream::logLockless( ss.str() ); return JS_TRUE; } @@ -894,14 +960,25 @@ return JS_FALSE; } - BSONHolder * o = GETHOLDER( cx , JSVAL_TO_OBJECT( argv[ 0 ] ) ); + JSObject * o = JSVAL_TO_OBJECT( argv[0] ); + + Convertor c(cx); double size = 0; - if ( o ){ - size = o->_obj.objsize(); + + if ( JS_InstanceOf( cx , o , &bson_ro_class , 0 ) || + JS_InstanceOf( cx , o , &bson_class , 0 ) ){ + BSONHolder * h = GETHOLDER( cx , o ); + if ( h ){ + size = h->_obj.objsize(); + } } - Convertor c(cx); + else { + BSONObj temp = c.toObject( o ); + size = temp.objsize(); + } + *rval = c.toval( size ); - return JS_TRUE; + return JS_TRUE; } JSFunctionSpec objectHelpers[] = { @@ -934,7 +1011,15 @@ return JS_TRUE; } - jsval val = c.toval( e ); + jsval val; + try { + val = c.toval( e ); + } + catch ( InvalidUTF8Exception& ) { + JS_LeaveLocalRootScope( cx ); + JS_ReportError( cx , "invalid utf8" ); + return JS_FALSE; + } assert( ! holder->_inResolve ); holder->_inResolve = true; @@ -1115,20 +1200,22 @@ } void localConnect( const char * dbName ){ - smlock; - uassert( 10225 , "already setup for external db" , ! _externalSetup ); - if ( _localConnect ){ - uassert( 10226 , "connected to different db" , _localDBName == dbName ); - return; + { + smlock; + uassert( 10225 , "already setup for external db" , ! _externalSetup ); + if ( _localConnect ){ + uassert( 10226 , "connected to different db" , _localDBName == dbName ); + return; + } + + initMongoJS( this , _context , _global , true ); + + exec( "_mongo = new Mongo();" ); + exec( ((string)"db = _mongo.getDB( \"" + dbName + "\" ); ").c_str() ); + + _localConnect = true; + _localDBName = dbName; } - - initMongoJS( this , _context , _global , true ); - - exec( "_mongo = new Mongo();" ); - exec( ((string)"db = _mongo.getDB( \"" + dbName + "\" ); ").c_str() ); - - _localConnect = true; - _localDBName = dbName; loadStored(); } @@ -1309,6 +1396,15 @@ JSBool worked = JS_EvaluateScript( _context , _global , code.c_str() , strlen( code.c_str() ) , name.c_str() , 0 , &ret ); uninstallCheckTimeout( timeoutMs ); + if ( ! worked && _error.size() == 0 ){ + jsval v; + if ( JS_GetPendingException( _context , &v ) ){ + _error = _convertor->toString( v ); + if ( reportError ) + cout << _error << endl; + } + } + if ( assertOnError ) uassert( 10228 , name + " exec failed" , worked ); @@ -1387,7 +1483,6 @@ code << field << "_" << " = { x : " << field << "_ }; "; code << field << " = function(){ return nativeHelper.apply( " << field << "_ , arguments ); }"; exec( code.str().c_str() ); - } virtual void gc(){ @@ -1424,15 +1519,20 @@ }; + /* used to make the logging not overly chatty in the mongo shell. */ + extern bool isShell; + void errorReporter( JSContext *cx, const char *message, JSErrorReport *report ){ stringstream ss; - ss << "JS Error: " << message; + if( !isShell ) + ss << "JS Error: "; + ss << message; if ( report && report->filename ){ ss << " " << report->filename << ":" << report->lineno; } - log() << ss.str() << endl; + tlog() << ss.str() << endl; if ( currentScope.get() ){ currentScope->gotError( ss.str() ); @@ -1446,10 +1546,10 @@ for ( uintN i=0; iexecFile( filename , false , true , false ) ){ - JS_ReportError( cx , ((string)"error loading file: " + filename ).c_str() ); + JS_ReportError( cx , ((string)"error loading js file: " + filename ).c_str() ); return JS_FALSE; } } diff -Nru mongodb-1.4.4/scripting/engine_spidermonkey.h mongodb-1.6.3/scripting/engine_spidermonkey.h --- mongodb-1.4.4/scripting/engine_spidermonkey.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/engine_spidermonkey.h 2010-09-24 10:02:42.000000000 -0700 @@ -37,7 +37,7 @@ #include "jstypes.h" #undef JS_PUBLIC_API #undef JS_PUBLIC_DATA -#define JS_PUBLIC_API(t) t +#define JS_PUBLIC_API(t) t __cdecl #define JS_PUBLIC_DATA(t) t #endif diff -Nru mongodb-1.4.4/scripting/engine_v8.cpp mongodb-1.6.3/scripting/engine_v8.cpp --- mongodb-1.4.4/scripting/engine_v8.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/engine_v8.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -57,9 +57,12 @@ _global->Set(v8::String::New("load"), v8::FunctionTemplate::New(loadCallback, v8::External::New(this))->GetFunction() ); - - _wrapper = Persistent< v8::Function >::New( getObjectWrapperTemplate()->GetFunction() ); + _wrapper = Persistent< v8::Function >::New( getObjectWrapperTemplate()->GetFunction() ); + + _global->Set(v8::String::New("gc"), v8::FunctionTemplate::New(GCV8)->GetFunction() ); + + installDBTypes( _global ); } @@ -232,7 +235,7 @@ string code = raw; if ( code.find( "function" ) == string::npos ){ if ( code.find( "\n" ) == string::npos && - code.find( "return" ) == string::npos && + ! hasJSReturn( code ) && ( code.find( ";" ) == string::npos || code.find( ";" ) == code.size() - 1 ) ){ code = "return " + code; } @@ -383,6 +386,7 @@ } void V8Scope::gc() { + cout << "in gc" << endl; Locker l; while( V8::IdleNotification() ); } diff -Nru mongodb-1.4.4/scripting/sm_db.cpp mongodb-1.6.3/scripting/sm_db.cpp --- mongodb-1.4.4/scripting/sm_db.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/sm_db.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -19,6 +19,15 @@ #include "../client/syncclusterconnection.h" #include "../util/base64.h" +#include "../util/text.h" +#include "../util/hex.h" + +#if( BOOST_VERSION >= 104200 ) +//#include +#define HAVE_UUID 1 +#else +; +#endif namespace mongo { @@ -86,25 +95,49 @@ JSBool internal_cursor_hasNext(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ DBClientCursor *cursor = getCursor( cx, obj ); - *rval = cursor->more() ? JSVAL_TRUE : JSVAL_FALSE; + try { + *rval = cursor->more() ? JSVAL_TRUE : JSVAL_FALSE; + } + catch ( std::exception& e ){ + JS_ReportError( cx , e.what() ); + return JS_FALSE; + } + return JS_TRUE; + } + + JSBool internal_cursor_objsLeftInBatch(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ + DBClientCursor *cursor = getCursor( cx, obj ); + Convertor c(cx); + *rval = c.toval((double) cursor->objsLeftInBatch() ); return JS_TRUE; } JSBool internal_cursor_next(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ DBClientCursor *cursor = getCursor( cx, obj ); - if ( ! cursor->more() ){ - JS_ReportError( cx , "cursor at the end" ); + + BSONObj n; + + try { + if ( ! cursor->more() ){ + JS_ReportError( cx , "cursor at the end" ); + return JS_FALSE; + } + + n = cursor->next(); + } + catch ( std::exception& e ){ + JS_ReportError( cx , e.what() ); return JS_FALSE; } - Convertor c(cx); - BSONObj n = cursor->next(); + Convertor c(cx); *rval = c.toval( &n ); return JS_TRUE; } JSFunctionSpec internal_cursor_functions[] = { { "hasNext" , internal_cursor_hasNext , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } , + { "objsLeftInBatch" , internal_cursor_objsLeftInBatch , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } , { "next" , internal_cursor_next , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } , { 0 } }; @@ -139,43 +172,28 @@ JSBool mongo_external_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){ Convertor c( cx ); - uassert( 10238 , "0 or 1 args to Mongo" , argc <= 1 ); + smuassert( cx , "0 or 1 args to Mongo" , argc <= 1 ); string host = "127.0.0.1"; if ( argc > 0 ) host = c.toString( argv[0] ); - int numCommas = DBClientBase::countCommas( host ); - - shared_ptr< DBClientWithCommands > conn; - string errmsg; - if ( numCommas == 0 ){ - DBClientConnection * c = new DBClientConnection( true ); - conn.reset( c ); - if ( ! c->connect( host , errmsg ) ){ - JS_ReportError( cx , ((string)"couldn't connect: " + errmsg).c_str() ); - return JS_FALSE; - } - ScriptEngine::runConnectCallback( *c ); - } - else if ( numCommas == 1 ){ // paired - DBClientPaired * c = new DBClientPaired(); - conn.reset( c ); - if ( ! c->connect( host ) ){ - JS_ReportError( cx , "couldn't connect to pair" ); - return JS_FALSE; - } - } - else if ( numCommas == 2 ){ - conn.reset( new SyncClusterConnection( host ) ); + + ConnectionString cs = ConnectionString::parse( host , errmsg ); + if ( ! cs.isValid() ){ + JS_ReportError( cx , errmsg.c_str() ); + return JS_FALSE; } - else { - JS_ReportError( cx , "1 (paired) or 2(quorum) commas are allowed" ); + + shared_ptr< DBClientWithCommands > conn( cs.connect( errmsg ) ); + if ( ! conn ){ + JS_ReportError( cx , errmsg.c_str() ); return JS_FALSE; } - + ScriptEngine::runConnectCallback( *conn ); + assert( JS_SetPrivate( cx , obj , (void*)( new shared_ptr< DBClientWithCommands >( conn ) ) ) ); jsval host_val = c.toval( host.c_str() ); assert( JS_SetProperty( cx , obj , "host" , &host_val ) ); @@ -205,9 +223,9 @@ }; JSBool mongo_find(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ - uassert( 10240 , "mongo_find neesd 6 args" , argc == 6 ); + smuassert( cx , "mongo_find needs 6 args" , argc == 6 ); shared_ptr< DBClientWithCommands > * connHolder = (shared_ptr< DBClientWithCommands >*)JS_GetPrivate( cx , obj ); - uassert( 10241 , "no connection!" , connHolder && connHolder->get() ); + smuassert( cx , "no connection!" , connHolder && connHolder->get() ); DBClientWithCommands *conn = connHolder->get(); Convertor c( cx ); @@ -308,7 +326,7 @@ } JSBool mongo_remove(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ - smuassert( cx , "mongo_remove needs 2 arguments" , argc == 2 ); + smuassert( cx , "mongo_remove needs 2 or 3 arguments" , argc == 2 || argc == 3 ); smuassert( cx , "2nd param to insert has to be an object" , JSVAL_IS_OBJECT( argv[1] ) ); Convertor c( cx ); @@ -322,9 +340,12 @@ string ns = c.toString( argv[0] ); BSONObj o = c.toObject( argv[1] ); - + bool justOne = false; + if ( argc > 2 ) + justOne = c.toBoolean( argv[2] ); + try { - conn->remove( ns , o ); + conn->remove( ns , o , justOne ); return JS_TRUE; } catch ( ... ){ @@ -342,7 +363,6 @@ { 0 } }; - // ------------- db_collection ------------- JSBool db_collection_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){ @@ -516,7 +536,7 @@ JSBool object_id_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ Convertor c(cx); - return *rval = c.getProperty( obj , "str" ); + return (JSBool) (*rval = c.getProperty( obj , "str" )); } JSFunctionSpec object_id_functions[] = { @@ -524,7 +544,6 @@ { 0 } }; - // dbpointer JSBool dbpointer_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){ @@ -580,8 +599,78 @@ JSClass dbref_class = bson_class; // name will be fixed later - // BinData + // UUID ************************** + + JSBool uuid_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){ + Convertor c( cx ); + + if( argc == 0 ) { +#if defined(HAVE_UUID) + //uuids::uuid +#else +#endif + JS_ReportError( cx , "UUID needs 1 argument -- UUID(hexstr)" ); + return JS_FALSE; + } + else if ( argc == 1 ) { + + string encoded = c.toString( argv[ 0 ] ); + if( encoded.size() != 32 ) { + JS_ReportError( cx, "expect 32 char hex string to UUID()" ); + return JS_FALSE; + } + + char buf[16]; + for( int i = 0; i < 16; i++ ) { + buf[i] = fromHex(encoded.c_str() + i * 2); + } + + assert( JS_SetPrivate( cx, obj, new BinDataHolder( buf, 16 ) ) ); + c.setProperty( obj, "len", c.toval( (double)16 ) ); + c.setProperty( obj, "type", c.toval( (double)3 ) ); + + return JS_TRUE; + } + else { + JS_ReportError( cx , "UUID needs 1 argument -- UUID(hexstr)" ); + return JS_FALSE; + } + } + + JSBool uuid_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ + Convertor c(cx); + void *holder = JS_GetPrivate( cx, obj ); + assert( holder ); + const char *data = ( ( BinDataHolder* )( holder ) )->c_; + stringstream ss; + ss << "UUID(\"" << toHex(data, 16); + ss << "\")"; + string ret = ss.str(); + return *rval = c.toval( ret.c_str() ); + } + + void uuid_finalize( JSContext * cx , JSObject * obj ){ + Convertor c(cx); + void *holder = JS_GetPrivate( cx, obj ); + if ( holder ){ + delete ( BinDataHolder* )holder; + assert( JS_SetPrivate( cx , obj , 0 ) ); + } + } + + JSClass uuid_class = { + "UUID" , JSCLASS_HAS_PRIVATE , + JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, + JS_EnumerateStub, JS_ResolveStub , JS_ConvertStub, uuid_finalize, + JSCLASS_NO_OPTIONAL_MEMBERS + }; + JSFunctionSpec uuid_functions[] = { + { "toString" , uuid_tostring , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } , + { 0 } + }; + + // BinData ************************** JSBool bindata_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){ Convertor c( cx ); @@ -589,17 +678,28 @@ if ( argc == 2 ){ int type = (int)c.toNumber( argv[ 0 ] ); + if( type < 0 || type > 255 ) { + JS_ReportError( cx , "invalid BinData subtype -- range is 0..255 see bsonspec.org" ); + return JS_FALSE; + } string encoded = c.toString( argv[ 1 ] ); - string decoded = base64::decode( encoded ); + string decoded; + try { + decoded = base64::decode( encoded ); + } + catch(...) { + JS_ReportError(cx, "BinData could not decode base64 parameter"); + return JS_FALSE; + } assert( JS_SetPrivate( cx, obj, new BinDataHolder( decoded.data(), decoded.length() ) ) ); - c.setProperty( obj, "len", c.toval( decoded.length() ) ); - c.setProperty( obj, "type", c.toval( type ) ); + c.setProperty( obj, "len", c.toval( (double)decoded.length() ) ); + c.setProperty( obj, "type", c.toval( (double)type ) ); return JS_TRUE; } else { - JS_ReportError( cx , "BinData needs 2 arguments" ); + JS_ReportError( cx , "BinData needs 2 arguments -- BinData(subtype,data)" ); return JS_FALSE; } } @@ -612,13 +712,53 @@ assert( holder ); const char *data = ( ( BinDataHolder* )( holder ) )->c_; stringstream ss; - ss << "BinData( type: " << type << ", base64: \""; + ss << "BinData(" << type << ",\""; base64::encode( ss, (const char *)data, len ); - ss << "\" )"; + ss << "\")"; string ret = ss.str(); return *rval = c.toval( ret.c_str() ); } + JSBool bindataBase64(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ + Convertor c(cx); + int len = (int)c.getNumber( obj, "len" ); + void *holder = JS_GetPrivate( cx, obj ); + assert( holder ); + const char *data = ( ( BinDataHolder* )( holder ) )->c_; + stringstream ss; + base64::encode( ss, (const char *)data, len ); + string ret = ss.str(); + return *rval = c.toval( ret.c_str() ); + } + + JSBool bindataAsHex(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ + Convertor c(cx); + int len = (int)c.getNumber( obj, "len" ); + void *holder = JS_GetPrivate( cx, obj ); + assert( holder ); + const char *data = ( ( BinDataHolder* )( holder ) )->c_; + stringstream ss; + ss << hex; + for( int i = 0; i < len; i++ ) { + unsigned v = (unsigned char) data[i]; + ss << v; + } + string ret = ss.str(); + return *rval = c.toval( ret.c_str() ); + } + + JSBool bindataLength(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ + Convertor c(cx); + int len = (int)c.getNumber( obj, "len" ); + return *rval = c.toval((double) len); + } + + JSBool bindataSubtype(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ + Convertor c(cx); + int t = (int)c.getNumber( obj, "type" ); + return *rval = c.toval((double) t); + } + void bindata_finalize( JSContext * cx , JSObject * obj ){ Convertor c(cx); void *holder = JS_GetPrivate( cx, obj ); @@ -637,6 +777,10 @@ JSFunctionSpec bindata_functions[] = { { "toString" , bindata_tostring , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } , + { "hex", bindataAsHex, 0, JSPROP_READONLY | JSPROP_PERMANENT, 0 } , + { "base64", bindataBase64, 0, JSPROP_READONLY | JSPROP_PERMANENT, 0 } , + { "length", bindataLength, 0, JSPROP_READONLY | JSPROP_PERMANENT, 0 } , + { "subtype", bindataSubtype, 0, JSPROP_READONLY | JSPROP_PERMANENT, 0 } , { 0 } }; @@ -699,6 +843,31 @@ JSCLASS_NO_OPTIONAL_MEMBERS }; + JSBool numberlong_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){ + smuassert( cx , "NumberLong needs 0 or 1 args" , argc == 0 || argc == 1 ); + + Convertor c( cx ); + if ( argc == 0 ) { + c.setProperty( obj, "floatApprox", c.toval( 0.0 ) ); + } else if ( JSVAL_IS_NUMBER( argv[ 0 ] ) ) { + c.setProperty( obj, "floatApprox", argv[ 0 ] ); + } else { + string num = c.toString( argv[ 0 ] ); + //PRINT(num); + const char *numStr = num.c_str(); + long long n; + try { + n = parseLL( numStr ); + //PRINT(n); + } catch ( const AssertionException & ) { + smuassert( cx , "could not convert string to long long" , false ); + } + c.makeLongObj( n, obj ); + } + + return JS_TRUE; + } + JSBool numberlong_valueof(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ Convertor c(cx); return *rval = c.toval( double( c.toNumberLongUnsafe( obj ) ) ); @@ -711,7 +880,14 @@ JSBool numberlong_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){ Convertor c(cx); stringstream ss; - ss << c.toNumberLongUnsafe( obj ); + long long val = c.toNumberLongUnsafe( obj ); + const long long limit = 2LL << 30; + + if ( val <= -limit || limit <= val ) + ss << "NumberLong(\"" << val << "\")"; + else + ss << "NumberLong(" << val << ")"; + string ret = ss.str(); return *rval = c.toval( ret.c_str() ); } @@ -819,9 +995,10 @@ assert( JS_InitClass( cx , global , 0 , &dbquery_class , dbquery_constructor , 0 , 0 , 0 , 0 , 0 ) ); assert( JS_InitClass( cx , global , 0 , &dbpointer_class , dbpointer_constructor , 0 , 0 , dbpointer_functions , 0 , 0 ) ); assert( JS_InitClass( cx , global , 0 , &bindata_class , bindata_constructor , 0 , 0 , bindata_functions , 0 , 0 ) ); + assert( JS_InitClass( cx , global , 0 , &uuid_class , uuid_constructor , 0 , 0 , uuid_functions , 0 , 0 ) ); assert( JS_InitClass( cx , global , 0 , ×tamp_class , 0 , 0 , 0 , 0 , 0 , 0 ) ); - assert( JS_InitClass( cx , global , 0 , &numberlong_class , 0 , 0 , 0 , numberlong_functions , 0 , 0 ) ); + assert( JS_InitClass( cx , global , 0 , &numberlong_class , numberlong_constructor , 0 , 0 , numberlong_functions , 0 , 0 ) ); assert( JS_InitClass( cx , global , 0 , &minkey_class , 0 , 0 , 0 , 0 , 0 , 0 ) ); assert( JS_InitClass( cx , global , 0 , &maxkey_class , 0 , 0 , 0 , 0 , 0 , 0 ) ); @@ -842,39 +1019,39 @@ if ( JS_InstanceOf( c->_context , o , &object_id_class , 0 ) ){ OID oid; oid.init( c->getString( o , "str" ) ); - b.append( name.c_str() , oid ); + b.append( name , oid ); return true; } if ( JS_InstanceOf( c->_context , o , &minkey_class , 0 ) ){ - b.appendMinKey( name.c_str() ); + b.appendMinKey( name ); return true; } if ( JS_InstanceOf( c->_context , o , &maxkey_class , 0 ) ){ - b.appendMaxKey( name.c_str() ); + b.appendMaxKey( name ); return true; } if ( JS_InstanceOf( c->_context , o , ×tamp_class , 0 ) ){ - b.appendTimestamp( name.c_str() , (unsigned long long)c->getNumber( o , "t" ) , (unsigned int )c->getNumber( o , "i" ) ); + b.appendTimestamp( name , (unsigned long long)c->getNumber( o , "t" ) , (unsigned int )c->getNumber( o , "i" ) ); return true; } if ( JS_InstanceOf( c->_context , o , &numberlong_class , 0 ) ){ - b.append( name.c_str() , c->toNumberLongUnsafe( o ) ); + b.append( name , c->toNumberLongUnsafe( o ) ); return true; } if ( JS_InstanceOf( c->_context , o , &dbpointer_class , 0 ) ){ - b.appendDBRef( name.c_str() , c->getString( o , "ns" ).c_str() , c->toOID( c->getProperty( o , "id" ) ) ); + b.appendDBRef( name , c->getString( o , "ns" ).c_str() , c->toOID( c->getProperty( o , "id" ) ) ); return true; } if ( JS_InstanceOf( c->_context , o , &bindata_class , 0 ) ){ void *holder = JS_GetPrivate( c->_context , o ); const char *data = ( ( BinDataHolder * )( holder ) )->c_; - b.appendBinData( name.c_str() , + b.appendBinData( name , (int)(c->getNumber( o , "len" )) , (BinDataType)((char)(c->getNumber( o , "type" ) ) ) , data ); @@ -886,21 +1063,21 @@ { jsdouble d = js_DateGetMsecSinceEpoch( c->_context , o ); if ( d ){ - b.appendDate( name.c_str() , Date_t(d) ); + b.appendDate( name , Date_t(d) ); return true; } } #elif defined( XULRUNNER ) if ( JS_InstanceOf( c->_context , o, globalSMEngine->_dateClass , 0 ) ){ jsdouble d = js_DateGetMsecSinceEpoch( c->_context , o ); - b.appendDate( name.c_str() , Date_t(d) ); + b.appendDate( name , Date_t(d) ); return true; } #else if ( JS_InstanceOf( c->_context , o, &js_DateClass , 0 ) ){ jsdouble d = js_DateGetMsecSinceEpoch( c->_context , o ); //TODO: make signed - b.appendDate( name.c_str() , Date_t((unsigned long long)d) ); + b.appendDate( name , Date_t((unsigned long long)d) ); return true; } #endif @@ -909,7 +1086,7 @@ if ( JS_InstanceOf( c->_context , o , &dbquery_class , 0 ) || JS_InstanceOf( c->_context , o , &mongo_class , 0 ) || JS_InstanceOf( c->_context , o , &db_collection_class , 0 ) ){ - b.append( name.c_str() , c->toString( val ) ); + b.append( name , c->toString( val ) ); return true; } diff -Nru mongodb-1.4.4/scripting/utils.cpp mongodb-1.6.3/scripting/utils.cpp --- mongodb-1.4.4/scripting/utils.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/utils.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,9 +16,10 @@ */ -#include "stdafx.h" +#include "pch.h" #include "engine.h" #include "../util/md5.hpp" +#include "../util/version.h" namespace mongo { diff -Nru mongodb-1.4.4/scripting/v8_db.cpp mongodb-1.6.3/scripting/v8_db.cpp --- mongodb-1.4.4/scripting/v8_db.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/v8_db.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -20,6 +20,7 @@ #include "v8_db.h" #include "engine.h" #include "util/base64.h" +#include "util/text.h" #include "../client/syncclusterconnection.h" #include @@ -28,12 +29,11 @@ namespace mongo { -#define CONN_STRING (v8::String::New( "_conn" )) - #define DDD(x) v8::Handle getMongoFunctionTemplate( bool local ){ v8::Local mongo = FunctionTemplate::New( local ? mongoConsLocal : mongoConsExternal ); + mongo->InstanceTemplate()->SetInternalFieldCount( 1 ); v8::Local proto = mongo->PrototypeTemplate(); @@ -43,9 +43,13 @@ proto->Set( v8::String::New( "update" ) , FunctionTemplate::New( mongoUpdate ) ); Local ic = FunctionTemplate::New( internalCursorCons ); + ic->InstanceTemplate()->SetInternalFieldCount( 1 ); ic->PrototypeTemplate()->Set( v8::String::New("next") , FunctionTemplate::New( internalCursorNext ) ); ic->PrototypeTemplate()->Set( v8::String::New("hasNext") , FunctionTemplate::New( internalCursorHasNext ) ); + ic->PrototypeTemplate()->Set( v8::String::New("objsLeftInBatch") , FunctionTemplate::New( internalCursorObjsLeftInBatch ) ); proto->Set( v8::String::New( "internalCursor" ) , ic ); + + return mongo; } @@ -131,9 +135,10 @@ global->Get( v8::String::New( "Object" ) )->ToObject()->Set( v8::String::New("bsonsize") , FunctionTemplate::New( bsonsize )->GetFunction() ); } - void destroyConnection( Persistent object, void* parameter){ - // TODO - cout << "warning: destroyConnection not implemented" << endl; + void destroyConnection( Persistent self, void* parameter){ + delete static_cast(parameter); + self.Dispose(); + self.Clear(); } Handle mongoConsExternal(const Arguments& args){ @@ -148,47 +153,22 @@ strcpy( host , "127.0.0.1" ); } - DBClientWithCommands * conn = 0; - int commas = 0; - for ( int i=0; i<255; i++ ){ - if ( host[i] == ',' ) - commas++; - else if ( host[i] == 0 ) - break; - } - - if ( commas == 0 ){ - DBClientConnection * c = new DBClientConnection( true ); - string errmsg; - if ( ! c->connect( host , errmsg ) ){ - delete c; - string x = "couldn't connect: "; - x += errmsg; - return v8::ThrowException( v8::String::New( x.c_str() ) ); - } - conn = c; - } - else if ( commas == 1 ){ - DBClientPaired * c = new DBClientPaired(); - if ( ! c->connect( host ) ){ - delete c; - return v8::ThrowException( v8::String::New( "couldn't connect to pair" ) ); - } - conn = c; - } - else if ( commas == 2 ){ - conn = new SyncClusterConnection( host ); - } - else { - return v8::ThrowException( v8::String::New( "too many commas" ) ); - } - - Persistent self = Persistent::New( args.This() ); + string errmsg; + ConnectionString cs = ConnectionString::parse( host , errmsg ); + if ( ! cs.isValid() ) + return v8::ThrowException( v8::String::New( errmsg.c_str() ) ); + + + DBClientWithCommands * conn = cs.connect( errmsg ); + if ( ! conn ) + return v8::ThrowException( v8::String::New( errmsg.c_str() ) ); + + Persistent self = Persistent::New( args.Holder() ); self.MakeWeak( conn , destroyConnection ); ScriptEngine::runConnectCallback( *conn ); - // NOTE I don't believe the conn object will ever be freed. - args.This()->Set( CONN_STRING , External::New( conn ) ); + + args.This()->SetInternalField( 0 , External::New( conn ) ); args.This()->Set( v8::String::New( "slaveOk" ) , Boolean::New( false ) ); args.This()->Set( v8::String::New( "host" ) , v8::String::New( host ) ); @@ -206,7 +186,7 @@ self.MakeWeak( conn , destroyConnection ); // NOTE I don't believe the conn object will ever be freed. - args.This()->Set( CONN_STRING , External::New( conn ) ); + args.This()->SetInternalField( 0 , External::New( conn ) ); args.This()->Set( v8::String::New( "slaveOk" ) , Boolean::New( false ) ); args.This()->Set( v8::String::New( "host" ) , v8::String::New( "EMBEDDED" ) ); @@ -223,7 +203,7 @@ #endif DBClientBase * getConnection( const Arguments& args ){ - Local c = External::Cast( *(args.This()->Get( CONN_STRING )) ); + Local c = External::Cast( *(args.This()->GetInternalField( 0 )) ); DBClientBase * conn = (DBClientBase*)(c->Value()); assert( conn ); return conn; @@ -231,6 +211,12 @@ // ---- real methods + void destroyCursor( Persistent self, void* parameter){ + delete static_cast(parameter); + self.Dispose(); + self.Clear(); + } + /** 0 - namespace 1 - query @@ -239,6 +225,8 @@ 4 - skip */ Handle mongoFind(const Arguments& args){ + HandleScope handle_scope; + jsassert( args.Length() == 6 , "find needs 6 args" ); jsassert( args[1]->IsObject() , "needs to be an object" ); DBClientBase * conn = getConnection( args ); @@ -268,11 +256,12 @@ } v8::Function * cons = (v8::Function*)( *( mongo->Get( v8::String::New( "internalCursor" ) ) ) ); assert( cons ); - Local c = cons->NewInstance(); - - // NOTE I don't believe the cursor object will ever be freed. - c->Set( v8::String::New( "cursor" ) , External::New( cursor.release() ) ); - return c; + + Persistent c = Persistent::New( cons->NewInstance() ); + c.MakeWeak( cursor.get() , destroyCursor ); + + c->SetInternalField( 0 , External::New( cursor.release() ) ); + return handle_scope.Close(c); } catch ( ... ){ return v8::ThrowException( v8::String::New( "socket error on query" ) ); @@ -308,7 +297,7 @@ } v8::Handle mongoRemove(const v8::Arguments& args){ - jsassert( args.Length() == 2 , "remove needs 2 args" ); + jsassert( args.Length() == 2 || args.Length() == 3 , "remove needs 2 args" ); jsassert( args[1]->IsObject() , "have to remove an object template" ); DBClientBase * conn = getConnection( args ); @@ -317,10 +306,15 @@ v8::Handle in = args[1]->ToObject(); BSONObj o = v8ToMongo( in ); + bool justOne = false; + if ( args.Length() > 2 ){ + justOne = args[2]->BooleanValue(); + } + DDD( "want to remove : " << o.jsonString() ); try { v8::Unlocker u; - conn->remove( ns , o ); + conn->remove( ns , o , justOne ); } catch ( ... ){ return v8::ThrowException( v8::String::New( "socket error on remove" ) ); @@ -362,7 +356,8 @@ // --- cursor --- mongo::DBClientCursor * getCursor( const Arguments& args ){ - Local c = External::Cast( *(args.This()->Get( v8::String::New( "cursor" ) ) ) ); + Local c = External::Cast( *(args.This()->GetInternalField( 0 ) ) ); + mongo::DBClientCursor * cursor = (mongo::DBClientCursor*)(c->Value()); return cursor; } @@ -395,6 +390,18 @@ return Boolean::New( ret ); } + v8::Handle internalCursorObjsLeftInBatch(const v8::Arguments& args){ + mongo::DBClientCursor * cursor = getCursor( args ); + if ( ! cursor ) + return v8::Number::New( (double) 0 ); + int ret; + { + v8::Unlocker u; + ret = cursor->objsLeftInBatch(); + } + return v8::Number::New( (double) ret ); + } + // --- DB ---- @@ -623,17 +630,17 @@ v8::String::Utf8Value data( it->Get( v8::String::New( "data" ) ) ); stringstream ss; - ss << "BinData( type: " << type << ", base64: \""; + ss << "BinData(" << type << ",\""; base64::encode( ss, *data, len ); - ss << "\" )"; + ss << "\")"; string ret = ss.str(); return v8::String::New( ret.c_str() ); } v8::Handle numberLongInit( const v8::Arguments& args ) { - if (args.Length() != 1 && args.Length() != 3) { - return v8::ThrowException( v8::String::New( "NumberLong needs 1 or 3 arguments" ) ); + if (args.Length() != 0 && args.Length() != 1 && args.Length() != 3) { + return v8::ThrowException( v8::String::New( "NumberLong needs 0, 1 or 3 arguments" ) ); } v8::Handle it = args.This(); @@ -642,9 +649,33 @@ v8::Function* f = getNamedCons( "NumberLong" ); it = f->NewInstance(); } - - it->Set( v8::String::New( "floatApprox" ) , args[0] ); - if ( args.Length() == 3 ) { + + if ( args.Length() == 0 ) { + it->Set( v8::String::New( "floatApprox" ), v8::Number::New( 0 ) ); + } else if ( args.Length() == 1 ) { + if ( args[ 0 ]->IsNumber() ) { + it->Set( v8::String::New( "floatApprox" ), args[ 0 ] ); + } else { + v8::String::Utf8Value data( args[ 0 ] ); + string num = *data; + const char *numStr = num.c_str(); + long long n; + try { + n = parseLL( numStr ); + } catch ( const AssertionException & ) { + return v8::ThrowException( v8::String::New( "could not convert string to long long" ) ); + } + unsigned long long val = n; + if ( (long long)val == (long long)(double)(long long)(val) ) { + it->Set( v8::String::New( "floatApprox" ), v8::Number::New( (double)(long long)( val ) ) ); + } else { + it->Set( v8::String::New( "floatApprox" ), v8::Number::New( (double)(long long)( val ) ) ); + it->Set( v8::String::New( "top" ), v8::Integer::New( val >> 32 ) ); + it->Set( v8::String::New( "bottom" ), v8::Integer::New( (unsigned long)(val & 0x00000000ffffffff) ) ); + } + } + } else { + it->Set( v8::String::New( "floatApprox" ) , args[0] ); it->Set( v8::String::New( "top" ) , args[1] ); it->Set( v8::String::New( "bottom" ) , args[2] ); } @@ -687,10 +718,15 @@ v8::Handle it = args.This(); - long long val = numberLongVal( it ); - stringstream ss; - ss << val; + long long val = numberLongVal( it ); + const long long limit = 2LL << 30; + + if ( val <= -limit || limit <= val ) + ss << "NumberLong(\"" << val << "\")"; + else + ss << "NumberLong(" << val << ")"; + string ret = ss.str(); return v8::String::New( ret.c_str() ); } diff -Nru mongodb-1.4.4/scripting/v8_db.h mongodb-1.6.3/scripting/v8_db.h --- mongodb-1.4.4/scripting/v8_db.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/v8_db.h 2010-09-24 10:02:42.000000000 -0700 @@ -49,6 +49,7 @@ v8::Handle internalCursorCons(const v8::Arguments& args); v8::Handle internalCursorNext(const v8::Arguments& args); v8::Handle internalCursorHasNext(const v8::Arguments& args); + v8::Handle internalCursorObjsLeftInBatch(const v8::Arguments& args); // DB members diff -Nru mongodb-1.4.4/scripting/v8_utils.cpp mongodb-1.6.3/scripting/v8_utils.cpp --- mongodb-1.4.4/scripting/v8_utils.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/v8_utils.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -302,4 +302,11 @@ global->Set( v8::String::New( "_scopedThreadInject" ), FunctionTemplate::New( ScopedThreadInject )->GetFunction() ); } + Handle GCV8(const Arguments& args) { + Locker l; + while( V8::IdleNotification() ); + return v8::Undefined(); + } + + } diff -Nru mongodb-1.4.4/scripting/v8_utils.h mongodb-1.6.3/scripting/v8_utils.h --- mongodb-1.4.4/scripting/v8_utils.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/v8_utils.h 2010-09-24 10:02:42.000000000 -0700 @@ -29,6 +29,7 @@ v8::Handle Print(const v8::Arguments& args); v8::Handle Version(const v8::Arguments& args); + v8::Handle GCV8(const v8::Arguments& args); void ReportException(v8::TryCatch* handler); diff -Nru mongodb-1.4.4/scripting/v8_wrapper.cpp mongodb-1.6.3/scripting/v8_wrapper.cpp --- mongodb-1.4.4/scripting/v8_wrapper.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/scripting/v8_wrapper.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -395,31 +395,31 @@ void v8ToMongoElement( BSONObjBuilder & b , v8::Handle name , const string sname , v8::Handle value , int depth ){ if ( value->IsString() ){ - b.append( sname.c_str() , toSTLString( value ).c_str() ); + b.append( sname , toSTLString( value ).c_str() ); return; } if ( value->IsFunction() ){ - b.appendCode( sname.c_str() , toSTLString( value ).c_str() ); + b.appendCode( sname , toSTLString( value ).c_str() ); return; } if ( value->IsNumber() ){ if ( value->IsInt32() ) - b.append( sname.c_str(), int( value->ToInt32()->Value() ) ); + b.append( sname, int( value->ToInt32()->Value() ) ); else - b.append( sname.c_str() , value->ToNumber()->Value() ); + b.append( sname , value->ToNumber()->Value() ); return; } if ( value->IsArray() ){ BSONObj sub = v8ToMongo( value->ToObject() , depth ); - b.appendArray( sname.c_str() , sub ); + b.appendArray( sname , sub ); return; } if ( value->IsDate() ){ - b.appendDate( sname.c_str() , Date_t(v8::Date::Cast( *value )->NumberValue()) ); + b.appendDate( sname , Date_t( (unsigned long long)(v8::Date::Cast( *value )->NumberValue())) ); return; } @@ -434,15 +434,15 @@ if ( obj->InternalFieldCount() && obj->GetInternalField( 0 )->IsNumber() ) { switch( obj->GetInternalField( 0 )->ToInt32()->Value() ) { // NOTE Uint32's Value() gave me a linking error, so going with this instead case Timestamp: - b.appendTimestamp( sname.c_str(), - Date_t( obj->Get( v8::String::New( "t" ) )->ToNumber()->Value() ), - obj->Get( v8::String::New( "i" ) )->ToInt32()->Value() ); + b.appendTimestamp( sname, + Date_t( (unsigned long long)(obj->Get( v8::String::New( "t" ) )->ToNumber()->Value() )), + obj->Get( v8::String::New( "i" ) )->ToInt32()->Value() ); return; case MinKey: - b.appendMinKey( sname.c_str() ); + b.appendMinKey( sname ); return; case MaxKey: - b.appendMaxKey( sname.c_str() ); + b.appendMaxKey( sname ); return; default: assert( "invalid internal field" == 0 ); @@ -453,13 +453,13 @@ s = s.substr( 1 ); string r = s.substr( 0 , s.rfind( "/" ) ); string o = s.substr( s.rfind( "/" ) + 1 ); - b.appendRegex( sname.c_str() , r.c_str() , o.c_str() ); + b.appendRegex( sname , r.c_str() , o.c_str() ); } else if ( value->ToObject()->GetPrototype()->IsObject() && value->ToObject()->GetPrototype()->ToObject()->HasRealNamedProperty( v8::String::New( "isObjectId" ) ) ){ OID oid; oid.init( toSTLString( value ) ); - b.appendOID( sname.c_str() , &oid ); + b.appendOID( sname , &oid ); } else if ( !value->ToObject()->GetHiddenValue( v8::String::New( "__NumberLong" ) ).IsEmpty() ) { // TODO might be nice to potentially speed this up with an indexed internal @@ -475,42 +475,42 @@ (unsigned)( it->Get( v8::String::New( "bottom" ) )->ToInt32()->Value() ); } - b.append( sname.c_str(), val ); + b.append( sname, val ); } else if ( !value->ToObject()->GetHiddenValue( v8::String::New( "__DBPointer" ) ).IsEmpty() ) { OID oid; oid.init( toSTLString( value->ToObject()->Get( v8::String::New( "id" ) ) ) ); string ns = toSTLString( value->ToObject()->Get( v8::String::New( "ns" ) ) ); - b.appendDBRef( sname.c_str(), ns.c_str(), oid ); + b.appendDBRef( sname, ns.c_str(), oid ); } else if ( !value->ToObject()->GetHiddenValue( v8::String::New( "__BinData" ) ).IsEmpty() ) { int len = obj->Get( v8::String::New( "len" ) )->ToInt32()->Value(); v8::String::Utf8Value data( obj->Get( v8::String::New( "data" ) ) ); const char *dataArray = *data; assert( data.length() == len ); - b.appendBinData( sname.c_str(), + b.appendBinData( sname, len, mongo::BinDataType( obj->Get( v8::String::New( "type" ) )->ToInt32()->Value() ), dataArray ); } else { BSONObj sub = v8ToMongo( value->ToObject() , depth ); - b.append( sname.c_str() , sub ); + b.append( sname , sub ); } return; } if ( value->IsBoolean() ){ - b.appendBool( sname.c_str() , value->ToBoolean()->Value() ); + b.appendBool( sname , value->ToBoolean()->Value() ); return; } else if ( value->IsUndefined() ){ - b.appendUndefined( sname.c_str() ); + b.appendUndefined( sname ); return; } else if ( value->IsNull() ){ - b.appendNull( sname.c_str() ); + b.appendNull( sname ); return; } diff -Nru mongodb-1.4.4/shell/collection.js mongodb-1.6.3/shell/collection.js --- mongodb-1.4.4/shell/collection.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/collection.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,5 +1,6 @@ -// collection.js - +// collection.js - DBCollection support in the mongo shell +// db.colName is a DBCollection object +// or db["colName"] if ( ( typeof DBCollection ) == "undefined" ){ DBCollection = function( mongo , db , shortName , fullName ){ @@ -26,44 +27,50 @@ return this._shortName; } -DBCollection.prototype.help = function() { +DBCollection.prototype.help = function () { + var shortName = this.getName(); print("DBCollection help"); - print("\tdb.foo.count()"); - print("\tdb.foo.dataSize()"); - print("\tdb.foo.distinct( key ) - eg. db.foo.distinct( 'x' )"); - print("\tdb.foo.drop() drop the collection"); - print("\tdb.foo.dropIndex(name)"); - print("\tdb.foo.dropIndexes()"); - print("\tdb.foo.ensureIndex(keypattern,options) - options should be an object with these possible fields: name, unique, dropDups"); - print("\tdb.foo.reIndex()"); - print("\tdb.foo.find( [query] , [fields]) - first parameter is an optional query filter. second parameter is optional set of fields to return."); - print("\t e.g. db.foo.find( { x : 77 } , { name : 1 , x : 1 } )"); - print("\tdb.foo.find(...).count()"); - print("\tdb.foo.find(...).limit(n)"); - print("\tdb.foo.find(...).skip(n)"); - print("\tdb.foo.find(...).sort(...)"); - print("\tdb.foo.findOne([query])"); - print("\tdb.foo.findAndModify( { update : ... , remove : bool [, query: {}, sort: {}, 'new': false] } )"); - print("\tdb.foo.getDB() get DB object associated with collection"); - print("\tdb.foo.getIndexes()"); - print("\tdb.foo.group( { key : ..., initial: ..., reduce : ...[, cond: ...] } )"); - print("\tdb.foo.mapReduce( mapFunction , reduceFunction , )"); - print("\tdb.foo.remove(query)"); - print("\tdb.foo.renameCollection( newName , ) renames the collection."); - print("\tdb.foo.runCommand( name , ) runs a db command with the given name where the 1st param is the colleciton name" ); - print("\tdb.foo.save(obj)"); - print("\tdb.foo.stats()"); - print("\tdb.foo.storageSize() - includes free space allocated to this collection"); - print("\tdb.foo.totalIndexSize() - size in bytes of all the indexes"); - print("\tdb.foo.totalSize() - storage allocated for all data and indexes"); - print("\tdb.foo.update(query, object[, upsert_bool, multi_bool])"); - print("\tdb.foo.validate() - SLOW"); - print("\tdb.foo.getShardVersion() - only for use with sharding"); + print("\tdb." + shortName + ".find().help() - show DBCursor help"); + print("\tdb." + shortName + ".count()"); + print("\tdb." + shortName + ".dataSize()"); + print("\tdb." + shortName + ".distinct( key ) - eg. db." + shortName + ".distinct( 'x' )"); + print("\tdb." + shortName + ".drop() drop the collection"); + print("\tdb." + shortName + ".dropIndex(name)"); + print("\tdb." + shortName + ".dropIndexes()"); + print("\tdb." + shortName + ".ensureIndex(keypattern,options) - options should be an object with these possible fields: name, unique, dropDups"); + print("\tdb." + shortName + ".reIndex()"); + print("\tdb." + shortName + ".find( [query] , [fields]) - first parameter is an optional query filter. second parameter is optional set of fields to return."); + print("\t e.g. db." + shortName + ".find( { x : 77 } , { name : 1 , x : 1 } )"); + print("\tdb." + shortName + ".find(...).count()"); + print("\tdb." + shortName + ".find(...).limit(n)"); + print("\tdb." + shortName + ".find(...).skip(n)"); + print("\tdb." + shortName + ".find(...).sort(...)"); + print("\tdb." + shortName + ".findOne([query])"); + print("\tdb." + shortName + ".findAndModify( { update : ... , remove : bool [, query: {}, sort: {}, 'new': false] } )"); + print("\tdb." + shortName + ".getDB() get DB object associated with collection"); + print("\tdb." + shortName + ".getIndexes()"); + print("\tdb." + shortName + ".group( { key : ..., initial: ..., reduce : ...[, cond: ...] } )"); + print("\tdb." + shortName + ".mapReduce( mapFunction , reduceFunction , )"); + print("\tdb." + shortName + ".remove(query)"); + print("\tdb." + shortName + ".renameCollection( newName , ) renames the collection."); + print("\tdb." + shortName + ".runCommand( name , ) runs a db command with the given name where the first param is the collection name"); + print("\tdb." + shortName + ".save(obj)"); + print("\tdb." + shortName + ".stats()"); + print("\tdb." + shortName + ".storageSize() - includes free space allocated to this collection"); + print("\tdb." + shortName + ".totalIndexSize() - size in bytes of all the indexes"); + print("\tdb." + shortName + ".totalSize() - storage allocated for all data and indexes"); + print("\tdb." + shortName + ".update(query, object[, upsert_bool, multi_bool])"); + print("\tdb." + shortName + ".validate() - SLOW"); + print("\tdb." + shortName + ".getShardVersion() - only for use with sharding"); + return __magicNoPrint; } DBCollection.prototype.getFullName = function(){ return this._fullName; } +DBCollection.prototype.getMongo = function(){ + return this._db.getMongo(); +} DBCollection.prototype.getDB = function(){ return this._db; } @@ -166,8 +173,8 @@ this._lastID = obj._id; } -DBCollection.prototype.remove = function( t ){ - this._mongo.remove( this._fullName , this._massageObject( t ) ); +DBCollection.prototype.remove = function( t , justOne ){ + this._mongo.remove( this._fullName , this._massageObject( t ) , justOne ? true : false ); } DBCollection.prototype.update = function( query , obj , upsert , multi ){ @@ -333,7 +340,7 @@ var ret = this._db.runCommand( cmd ); if ( ! ret.ok ){ if (ret.errmsg == "No matching object found"){ - return {}; + return null; } throw "findAndModifyFailed failed: " + tojson( ret.errmsg ); } @@ -351,8 +358,10 @@ res.valid = false; - if ( res.result ){ - var str = "-" + tojson( res.result ); + var raw = res.result || res.raw; + + if ( raw ){ + var str = "-" + tojson( raw ); res.valid = ! ( str.match( /exception/ ) || str.match( /corrupt/ ) ); var p = /lastExtentSize:(\d+)/; @@ -572,6 +581,3 @@ DBCollection.prototype.tojson = DBCollection.prototype.toString; DBCollection.prototype.shellPrint = DBCollection.prototype.toString; - - - diff -Nru mongodb-1.4.4/shell/db.js mongodb-1.6.3/shell/db.js --- mongodb-1.4.4/shell/db.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/db.js 2010-09-24 10:02:42.000000000 -0700 @@ -259,7 +259,7 @@ print("\tdb.commandHelp(name) returns the help for the command"); print("\tdb.copyDatabase(fromdb, todb, fromhost)"); print("\tdb.createCollection(name, { size : ..., capped : ..., max : ... } )"); - print("\tdb.currentOp() displays the current operation in the db" ); + print("\tdb.currentOp() displays the current operation in the db"); print("\tdb.dropDatabase()"); print("\tdb.eval(func, args) run code server-side"); print("\tdb.getCollection(cname) same as db['cname'] or db.cname"); @@ -271,10 +271,12 @@ print("\tdb.getName()"); print("\tdb.getPrevError()"); print("\tdb.getProfilingLevel()"); - print("\tdb.getReplicationInfo()"); - print("\tdb.getSisterDB(name) get the db at the same server as this onew"); - print("\tdb.killOp(opid) kills the current operation in the db" ); - print("\tdb.printCollectionStats()" ); + print("\tdb.getReplicationInfo()"); + print("\tdb.getSisterDB(name) get the db at the same server as this one"); + print("\tdb.isMaster() check replica primary status"); + print("\tdb.killOp(opid) kills the current operation in the db"); + print("\tdb.listCommands() lists all the db commands"); + print("\tdb.printCollectionStats()"); print("\tdb.printReplicationInfo()"); print("\tdb.printSlaveReplicationInfo()"); print("\tdb.printShardingStatus()"); @@ -286,7 +288,10 @@ print("\tdb.setProfilingLevel(level,) 0=off 1=slow 2=all"); print("\tdb.shutdownServer()"); print("\tdb.stats()"); - print("\tdb.version() current version of the server" ); + print("\tdb.version() current version of the server"); + print("\tdb.getMongo().setSlaveOk() allow queries on a replication slave server"); + + return __magicNoPrint; } DB.prototype.printCollectionStats = function(){ @@ -309,9 +314,10 @@ *

Levels :

*
    *
  • 0=off
  • - *
  • 1=log very slow (>100ms) operations
  • + *
  • 1=log very slow operations; optional argument slowms specifies slowness threshold
  • *
  • 2=log all
  • * @param {String} level Desired level of profiling + * @param {String} slowms For slow logging, query duration that counts as slow (default 100ms) * @return SOMETHING_FIXME or null on error */ DB.prototype.setProfilingLevel = function(level,slowms) { @@ -471,14 +477,21 @@ return this.runCommand( { forceerror : 1 } ); } -DB.prototype.getLastError = function(){ - var res = this.runCommand( { getlasterror : 1 } ); +DB.prototype.getLastError = function( w , wtimeout ){ + var res = this.getLastErrorObj( w , wtimeout ); if ( ! res.ok ) throw "getlasterror failed: " + tojson( res ); return res.err; } -DB.prototype.getLastErrorObj = function(){ - var res = this.runCommand( { getlasterror : 1 } ); +DB.prototype.getLastErrorObj = function( w , wtimeout ){ + var cmd = { getlasterror : 1 }; + if ( w ){ + cmd.w = w; + if ( wtimeout ) + cmd.wtimeout = wtimeout; + } + var res = this.runCommand( cmd ); + if ( ! res.ok ) throw "getlasterror failed: " + tojson( res ); return res; @@ -502,17 +515,17 @@ var nsLength = this._name.length + 1; - this.getCollection( "system.namespaces" ).find().sort({name:1}).forEach( - function(z){ - var name = z.name; - - if ( name.indexOf( "$" ) >= 0 && name != "local.oplog.$main" ) - return; - - all.push( name.substring( nsLength ) ); - } - ); - return all; + var c = this.getCollection( "system.namespaces" ).find(); + while ( c.hasNext() ){ + var name = c.next().name; + + if ( name.indexOf( "$" ) >= 0 && name.indexOf( ".oplog.$" ) < 0 ) + continue; + + all.push( name.substring( nsLength ) ); + } + + return all.sort(); } DB.prototype.tojson = function(){ @@ -521,7 +534,9 @@ DB.prototype.toString = function(){ return this._name; -} +} + +DB.prototype.isMaster = function () { return this.runCommand("isMaster"); } DB.prototype.currentOp = function(){ return db.$cmd.sys.inprog.findOne(); @@ -615,13 +630,19 @@ DB.prototype.printSlaveReplicationInfo = function() { function g(x) { + assert( x , "how could this be null (printSlaveReplicationInfo gx)" ) print("source: " + x.host); - var st = new Date( DB.tsToSeconds( x.syncedTo ) * 1000 ); - var now = new Date(); - print("syncedTo: " + st.toString() ); - var ago = (now-st)/1000; - var hrs = Math.round(ago/36)/100; - print(" = " + Math.round(ago) + "secs ago (" + hrs + "hrs)"); + if ( x.syncedTo ){ + var st = new Date( DB.tsToSeconds( x.syncedTo ) * 1000 ); + var now = new Date(); + print("\t syncedTo: " + st.toString() ); + var ago = (now-st)/1000; + var hrs = Math.round(ago/36)/100; + print("\t\t = " + Math.round(ago) + "secs ago (" + hrs + "hrs)"); + } + else { + print( "\t doing initial sync" ); + } } var L = this.getSisterDB("local"); if( L.sources.count() == 0 ) { @@ -639,10 +660,39 @@ return this._adminCommand( "serverStatus" ); } +DB.prototype.serverCmdLineOpts = function(){ + return this._adminCommand( "getCmdLineOpts" ); +} + DB.prototype.version = function(){ return this.serverBuildInfo().version; } +DB.prototype.listCommands = function(){ + var x = this.runCommand( "listCommands" ); + for ( var name in x.commands ){ + var c = x.commands[name]; + + var s = name + ": "; + + switch ( c.lockType ){ + case -1: s += "read-lock"; break; + case 0: s += "no-lock"; break; + case 1: s += "write-lock"; break; + default: s += c.lockType; + } + + if (c.adminOnly) s += " adminOnly "; + if (c.adminOnly) s += " slaveOk "; + + s += "\n "; + s += c.help.replace(/\n/g, '\n '); + s += "\n"; + + print( s ); + } +} + DB.prototype.printShardingStatus = function(){ printShardingStatus( this.getSisterDB( "config" ) ); } diff -Nru mongodb-1.4.4/shell/dbshell.cpp mongodb-1.6.3/shell/dbshell.cpp --- mongodb-1.4.4/shell/dbshell.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/dbshell.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,9 +15,15 @@ * limitations under the License. */ - +#include "pch.h" #include +#if defined(_WIN32) +# if defined(USE_READLINE) +# define USE_READLINE_STATIC +# endif +#endif + #ifdef USE_READLINE #include #include @@ -30,18 +36,87 @@ #include "../util/unittest.h" #include "../db/cmdline.h" #include "utils.h" +#include "../util/password.h" +#include "../util/version.h" +#include "../util/goodies.h" using namespace std; using namespace boost::filesystem; +using mongo::BSONObj; +using mongo::BSONObjBuilder; +using mongo::BSONObjIterator; +using mongo::BSONElement; + string historyFile; bool gotInterrupted = 0; bool inMultiLine = 0; +static volatile bool atPrompt = false; // can eval before getting to prompt +bool autoKillOp = false; + -#if defined(USE_READLINE) && !defined(__freebsd__) && !defined(_WIN32) +#if defined(USE_READLINE) && !defined(__freebsd__) && !defined(__openbsd__) && !defined(_WIN32) #define CTRLC_HANDLE #endif +mongo::Scope * shellMainScope; + +void generateCompletions( const string& prefix , vector& all ){ + if ( prefix.find( '"' ) != string::npos ) + return; + shellMainScope->exec( "shellAutocomplete( \"" + prefix + "\" );" , "autocomplete help" , false , true , false ); + + BSONObjBuilder b; + shellMainScope->append( b , "" , "__autocomplete__" ); + BSONObj res = b.obj(); + BSONObj arr = res.firstElement().Obj(); + + BSONObjIterator i(arr); + while ( i.more() ){ + BSONElement e = i.next(); + all.push_back( e.String() ); + } + +} + +#ifdef USE_READLINE +static char** completionHook(const char* text , int start ,int end ){ + static map m; + + vector all; + + if ( start == 0 ){ + generateCompletions( string(text,end) , all ); + } + + if ( all.size() == 0 ){ + rl_bind_key('\t',0); + return 0; + } + + string longest = all[0]; + for ( vector::iterator i=all.begin(); i!=all.end(); ++i ){ + string s = *i; + for ( unsigned j=0; j> yn; + + if (yn != 'y' && yn != 'Y') + return; + } + + vector< string > uris; for( map< const void*, string >::iterator i = mongo::shellUtils::_allMyUris.begin(); i != mongo::shellUtils::_allMyUris.end(); ++i ) uris.push_back( i->second ); @@ -95,6 +195,7 @@ } void quitNicely( int sig ){ + mongo::goingAway = true; if ( sig == SIGINT && inMultiLine ){ gotInterrupted = 1; return; @@ -105,14 +206,28 @@ shellHistoryDone(); exit(0); } +#else +void quitNicely( int sig ){ + mongo::goingAway = true; + //killOps(); + shellHistoryDone(); + exit(0); +} #endif char * shellReadline( const char * prompt , int handlesigint = 0 ){ + atPrompt = true; #ifdef USE_READLINE + rl_bind_key('\t',rl_complete); + + #ifdef CTRLC_HANDLE - if ( ! handlesigint ) - return readline( prompt ); + if ( ! handlesigint ){ + char* ret = readline( prompt ); + atPrompt = false; + return ret; + } if ( setjmp( jbuf ) ){ gotInterrupted = 1; sigrelse(SIGINT); @@ -124,13 +239,16 @@ char * ret = readline( prompt ); signal( SIGINT , quitNicely ); + atPrompt = false; return ret; #else - printf("%s", prompt); + printf("%s", prompt); cout.flush(); char * buf = new char[1024]; char * l = fgets( buf , 1024 , stdin ); int len = strlen( buf ); - buf[len-1] = 0; + if ( len ) + buf[len-1] = 0; + atPrompt = false; return l; #endif } @@ -173,8 +291,8 @@ if ( url.find( "." ) != string::npos ) return url + "/test"; - if ( url.find( ":" ) != string::npos && - isdigit( url[url.find(":")+1] ) ) + if ( url.rfind( ":" ) != string::npos && + isdigit( url[url.rfind(":")+1] ) ) return url + "/test"; } return url; @@ -191,6 +309,10 @@ string newurl = host; if ( port.size() > 0 ) newurl += ":" + port; + else if (host.find(':') == string::npos){ + // need to add port with IPv6 addresses + newurl += ":27017"; + } newurl += "/" + url; @@ -283,7 +405,12 @@ } } +namespace mongo { + extern bool isShell; +} + int _main(int argc, char* argv[]) { + mongo::isShell = true; setupSignals(); mongo::shellUtils::RecordMyLocation( argv[ 0 ] ); @@ -314,15 +441,18 @@ ("host", po::value(&dbhost), "server to connect to") ("eval", po::value(&script), "evaluate javascript") ("username,u", po::value(&username), "username for authentication") - ("password,p", po::value(&password), "password for authentication") + ("password,p", new mongo::PasswordValue(&password), + "password for authentication") ("help,h", "show this usage information") ("version", "show version information") + ("ipv6", "enable IPv6 support (disabled by default)") ; hidden_options.add_options() ("dbaddress", po::value(), "dbaddress") ("files", po::value< vector >(), "files") ("nokillop", "nokillop") // for testing, kill op will also be disabled automatically if the tests starts a mongo program + ("autokillop", "autokillop") // for testing, will kill op without prompting ; positional_options.add("dbaddress", 1); @@ -350,6 +480,16 @@ return mongo::EXIT_BADOPTIONS; } + // hide password from ps output + for (int i=0; i < (argc-1); ++i){ + if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")){ + char* arg = argv[i+1]; + while (*arg){ + *arg++ = 'x'; + } + } + } + if (params.count("shell")) { runShell = true; } @@ -373,6 +513,9 @@ if (params.count("nokillop")) { mongo::shellUtils::_nokillop = true; } + if (params.count("autokillop")) { + autoKillOp = true; + } /* This is a bit confusing, here are the rules: * @@ -395,6 +538,9 @@ } } } + if (params.count("ipv6")){ + mongo::enableIPv6(); + } if ( ! mongo::cmdLine.quiet ) cout << "MongoDB shell version: " << mongo::versionString << endl; @@ -402,7 +548,7 @@ mongo::UnitTest::runTests(); if ( !nodb ) { // connect to db - if ( ! mongo::cmdLine.quiet ) cout << "url: " << url << endl; + //if ( ! mongo::cmdLine.quiet ) cout << "url: " << url << endl; stringstream ss; if ( mongo::cmdLine.quiet ) @@ -411,6 +557,11 @@ mongo::shellUtils::_dbConnect = ss.str(); + if ( params.count( "password" ) + && ( password.empty() ) ) { + password = mongo::askPassword(); + } + if ( username.size() && password.size() ){ stringstream ss; ss << "if ( ! db.auth( \"" << username << "\" , \"" << password << "\" ) ){ throw 'login failed'; }"; @@ -423,6 +574,10 @@ mongo::ScriptEngine::setup(); mongo::globalScriptEngine->setScopeInitCallback( mongo::shellUtils::initScope ); auto_ptr< mongo::Scope > scope( mongo::globalScriptEngine->newScope() ); + shellMainScope = scope.get(); + + if( runShell ) + cout << "type \"help\" for help" << endl; if ( !script.empty() ) { mongo::shellUtils::MongoProgramScope s; @@ -452,8 +607,6 @@ shellHistoryInit(); - cout << "type \"help\" for help" << endl; - //v8::Handle shellHelper = baseContext_->Global()->Get( v8::String::New( "shellHelper" ) )->ToObject(); while ( 1 ){ @@ -510,8 +663,8 @@ if ( ! wascmd ){ try { - scope->exec( code.c_str() , "(shell)" , false , true , false ); - scope->exec( "shellPrintHelper( __lastres__ );" , "(shell2)" , true , true , false ); + if ( scope->exec( code.c_str() , "(shell)" , false , true , false ) ) + scope->exec( "shellPrintHelper( __lastres__ );" , "(shell2)" , true , true , false ); } catch ( std::exception& e ){ cout << "error:" << e.what() << endl; @@ -525,6 +678,7 @@ shellHistoryDone(); } + mongo::goingAway = true; return 0; } diff -Nru mongodb-1.4.4/shell/mongo.js mongodb-1.6.3/shell/mongo.js --- mongodb-1.4.4/shell/mongo.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/mongo.js 2010-09-24 10:02:42.000000000 -0700 @@ -56,8 +56,9 @@ } Mongo.prototype.toString = function(){ - return "mongo connection to " + this.host; + return "connection to " + this.host; } +Mongo.prototype.tojson = Mongo.prototype.toString; connect = function( url , user , pass ){ chatty( "connecting to: " + url ) @@ -65,7 +66,7 @@ if ( user && ! pass ) throw "you specified a user and not a password. either you need a password, or you're using the old connect api"; - var idx = url.indexOf( "/" ); + var idx = url.lastIndexOf( "/" ); var db; diff -Nru mongodb-1.4.4/shell/mongo_vstudio.cpp mongodb-1.6.3/shell/mongo_vstudio.cpp --- mongodb-1.4.4/shell/mongo_vstudio.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/mongo_vstudio.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -1,5 +1,6 @@ const char * jsconcatcode = "__quiet = false;\n" + "__magicNoPrint = { __magicNoPrint : 1111 }\n" "chatty = function(s){\n" "if ( ! __quiet )\n" "print( s );}\n" @@ -9,14 +10,19 @@ "if ( tojson( a ) == tojson( b ) )\n" "return true;\n" "return false;}\n" - "doassert = function( msg ){\n" - "print( \"assert: \" + msg );\n" + "doassert = function (msg) {\n" + "if (msg.indexOf(\"assert\") == 0)\n" + "print(msg);\n" + "else\n" + "print(\"assert: \" + msg);\n" "throw msg;}\n" "assert = function( b , msg ){\n" "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" "if ( b )\n" "return;\n" - "doassert( \"assert failed : \" + msg );}\n" + "doassert( msg == undefined ? \"assert failed\" : \"assert failed : \" + msg );}\n" + "assert.automsg = function( b ) {\n" + "assert( eval( b ), b );}\n" "assert._debug = false;\n" "assert.eq = function( a , b , msg ){\n" "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" @@ -25,11 +31,29 @@ "if ( ( a != null && b != null ) && friendlyEqual( a , b ) )\n" "return;\n" "doassert( \"[\" + tojson( a ) + \"] != [\" + tojson( b ) + \"] are not equal : \" + msg );}\n" + "assert.eq.automsg = function( a, b ) {\n" + "assert.eq( eval( a ), eval( b ), \"[\" + a + \"] != [\" + b + \"]\" );}\n" "assert.neq = function( a , b , msg ){\n" "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" "if ( a != b )\n" "return;\n" "doassert( \"[\" + a + \"] != [\" + b + \"] are equal : \" + msg );}\n" + "assert.repeat = function( f, msg, timeout, interval ) {\n" + "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" + "var start = new Date();\n" + "timeout = timeout || 30000;\n" + "interval = interval || 200;\n" + "var last;\n" + "while( 1 ) {\n" + "if ( typeof( f ) == \"string\" ){\n" + "if ( eval( f ) )\n" + "return;}\n" + "else {\n" + "if ( f() )\n" + "return;}\n" + "if ( ( new Date() ).getTime() - start.getTime() > timeout )\n" + "break;\n" + "sleep( interval );}}\n" "assert.soon = function( f, msg, timeout, interval ) {\n" "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" "var start = new Date();\n" @@ -53,6 +77,8 @@ "catch ( e ){\n" "return e;}\n" "doassert( \"did not throw exception: \" + msg );}\n" + "assert.throws.automsg = function( func, params ) {\n" + "assert.throws( func, params, func.toString() );}\n" "assert.commandWorked = function( res , msg ){\n" "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" "if ( res.ok == 1 )\n" @@ -78,11 +104,24 @@ "if ( a > b )\n" "return;\n" "doassert( a + \" is not greater than \" + b + \" : \" + msg );}\n" - "assert.close = function( a , b , msg ){\n" - "var diff = Math.abs( (a-b)/((a+b)/2) );\n" - "if ( diff < .001 )\n" + "assert.lte = function( a , b , msg ){\n" + "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" + "if ( a <= b )\n" "return;\n" - "doassert( a + \" is not close to \" + b + \" diff: \" + diff + \" : \" + msg );}\n" + "doassert( a + \" is not less than or eq \" + b + \" : \" + msg );}\n" + "assert.gte = function( a , b , msg ){\n" + "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" + "if ( a >= b )\n" + "return;\n" + "doassert( a + \" is not greater than or eq \" + b + \" : \" + msg );}\n" + "assert.close = function( a , b , msg , places ){\n" + "if (places === undefined) {\n" + "places = 4;}\n" + "if (Math.round((a - b) * Math.pow(10, places)) === 0) {\n" + "return;}\n" + "doassert( a + \" is not equal to \" + b + \" within \" + places +\n" + "\" places, diff: \" + (a-b) + \" : \" + msg );\n" + "};\n" "Object.extend = function( dst , src , deep ){\n" "for ( var k in src ){\n" "var v = src[k];\n" @@ -182,6 +221,10 @@ "if ( !( i in o.__proto__ && o[ i ] === o.__proto__[ i ] ) ) {\n" "ret.push( i );}}\n" "return ret;}\n" + "if ( ! NumberLong.prototype ) {\n" + "NumberLong.prototype = {}}\n" + "NumberLong.prototype.tojson = function() {\n" + "return this.toString();}\n" "if ( ! ObjectId.prototype )\n" "ObjectId.prototype = {}\n" "ObjectId.prototype.toString = function(){\n" @@ -189,6 +232,10 @@ "ObjectId.prototype.tojson = function(){\n" "return \"ObjectId(\\\"\" + this.str + \"\\\")\";}\n" "ObjectId.prototype.isObjectId = true;\n" + "ObjectId.prototype.getTimestamp = function(){\n" + "return new Date(parseInt(this.toString().slice(0,8), 16)*1000);}\n" + "ObjectId.prototype.equals = function( other){\n" + "return this.str == other.str;}\n" "if ( typeof( DBPointer ) != \"undefined\" ){\n" "DBPointer.prototype.fetch = function(){\n" "assert( this.ns , \"need a ns\" );\n" @@ -216,10 +263,14 @@ "else {\n" "print( \"warning: no DBRef\" );}\n" "if ( typeof( BinData ) != \"undefined\" ){\n" - "BinData.prototype.tojson = function(){\n" - "return \"BinData type: \" + this.type + \" len: \" + this.len;}}\n" + "BinData.prototype.tojson = function () {\n" + "//return \"BinData type: \" + this.type + \" len: \" + this.len;\n" + "return this.toString();}}\n" "else {\n" - "print( \"warning: no BinData\" );}\n" + "print( \"warning: no BinData class\" );}\n" + "if ( typeof( UUID ) != \"undefined\" ){\n" + "UUID.prototype.tojson = function () {\n" + "return this.toString();}}\n" "if ( typeof _threadInject != \"undefined\" ){\n" "print( \"fork() available!\" );\n" "Thread = function(){\n" @@ -291,7 +342,9 @@ "\"jstests/indexb.js\",\n" "\"jstests/profile1.js\",\n" "\"jstests/mr3.js\",\n" - "\"jstests/apitest_db.js\"] );\n" + "\"jstests/indexh.js\",\n" + "\"jstests/apitest_db.js\",\n" + "\"jstests/evalb.js\"] );\n" "var serialTestsArr = [ \"jstests/fsync.js\",\n" "\"jstests/fsync2.js\" ];\n" "var serialTests = makeKeys( serialTestsArr );\n" @@ -301,8 +354,8 @@ "var i = 0;\n" "files.forEach(\n" "function(x) {\n" - "if ( /_runner/.test(x.name) ||\n" - "/_lodeRunner/.test(x.name) ||\n" + "if ( ( /[\\/\\\\]_/.test(x.name) ) ||\n" + "( ! /\\.js$/.test(x.name ) ) ||\n" "( x.name in skipTests ) ||\n" "( x.name in serialTests ) ||\n" "! /\\.js$/.test(x.name ) ){\n" @@ -356,6 +409,8 @@ "var nFailed = 0;\n" "runners.forEach( function( x ) { if( !x.returnData() ) { ++nFailed; } } );\n" "assert.eq( 0, nFailed, msg );}}\n" + "tojsononeline = function( x ){\n" + "return tojson( x , \" \" , true );}\n" "tojson = function( x, indent , nolint ){\n" "if ( x === null )\n" "return \"null\";\n" @@ -363,14 +418,24 @@ "return \"undefined\";\n" "if (!indent)\n" "indent = \"\";\n" - "switch ( typeof x ){\n" + "switch ( typeof x ) {\n" "case \"string\": {\n" "var s = \"\\\"\";\n" "for ( var i=0; i= 1ms\");\n" - "print( \"\\t\" + \"use set curent database to \" );\n" - "print( \"\\t\" + \"db.help() help on DB methods\");\n" - "print( \"\\t\" + \"db.foo.help() help on collection methods\");\n" - "print( \"\\t\" + \"db.foo.find() list objects in collection foo\" );\n" - "print( \"\\t\" + \"db.foo.find( { a : 1 } ) list objects in foo where a == 1\" );\n" - "print( \"\\t\" + \"it result of the last line evaluated; use to further iterate\");}\n" "shellHelper.use = function( dbname ){\n" "db = db.getMongo().getDB( dbname );\n" "print( \"switched to db \" + db.getName() );}\n" @@ -546,7 +608,8 @@ "this._data[k].forEach( function(z){ all.push( z.value ); } );}\n" "return all;}\n" "if ( typeof( gc ) == \"undefined\" ){\n" - "gc = function(){}}\n" + "gc = function(){\n" + "print( \"warning: using noop gc()\" );}}\n" "Math.sigFig = function( x , N ){\n" "if ( ! N ){\n" "N = 3;}\n" @@ -568,6 +631,117 @@ "for ( var i in inprog ) {\n" "if ( uris[ u ] == inprog[ i ].client ) {\n" "db.killOp( inprog[ i ].opid );}}}}\n" + "Geo = {};\n" + "Geo.distance = function( a , b ){\n" + "var ax = null;\n" + "var ay = null;\n" + "var bx = null;\n" + "var by = null;\n" + "for ( var key in a ){\n" + "if ( ax == null )\n" + "ax = a[key];\n" + "else if ( ay == null )\n" + "ay = a[key];}\n" + "for ( var key in b ){\n" + "if ( bx == null )\n" + "bx = b[key];\n" + "else if ( by == null )\n" + "by = b[key];}\n" + "return Math.sqrt( Math.pow( by - ay , 2 ) +\n" + "Math.pow( bx - ax , 2 ) );}\n" + "rs = function () { return \"try rs.help()\"; }\n" + "rs.help = function () {\n" + "print(\"\\trs.status() { replSetGetStatus : 1 } checks repl set status\");\n" + "print(\"\\trs.initiate() { replSetInitiate : null } initiates set with default settings\");\n" + "print(\"\\trs.initiate(cfg) { replSetInitiate : cfg } initiates set with configuration cfg\");\n" + "print(\"\\trs.add(hostportstr) add a new member to the set with default attributes\");\n" + "print(\"\\trs.add(membercfgobj) add a new member to the set with extra attributes\");\n" + "print(\"\\trs.addArb(hostportstr) add a new member which is arbiterOnly:true\");\n" + "print(\"\\trs.stepDown() step down as primary (momentarily)\");\n" + "print(\"\\trs.conf() return configuration from local.system.replset\");\n" + "print(\"\\trs.slaveOk() shorthand for db.getMongo().setSlaveOk()\");\n" + "print();\n" + "print(\"\\tdb.isMaster() check who is primary\");\n" + "print();\n" + "print(\"\\tsee also http://:28017/_replSet for additional diagnostic info\");}\n" + "rs.slaveOk = function () { return db.getMongo().setSlaveOk(); }\n" + "rs.status = function () { return db._adminCommand(\"replSetGetStatus\"); }\n" + "rs.isMaster = function () { return db.isMaster(); }\n" + "rs.initiate = function (c) { return db._adminCommand({ replSetInitiate: c }); }\n" + "rs.add = function (hostport, arb) {\n" + "var cfg = hostport;\n" + "var local = db.getSisterDB(\"local\");\n" + "assert(local.system.replset.count() <= 1, \"error: local.system.replset has unexpected contents\");\n" + "var c = local.system.replset.findOne();\n" + "assert(c, \"no config object retrievable from local.system.replset\");\n" + "c.version++;\n" + "var max = 0;\n" + "for (var i in c.members)\n" + "if (c.members[i]._id > max) max = c.members[i]._id;\n" + "if (isString(hostport)) {\n" + "cfg = { _id: max + 1, host: hostport };\n" + "if (arb)\n" + "cfg.arbiterOnly = true;}\n" + "c.members.push(cfg);\n" + "return db._adminCommand({ replSetReconfig: c });}\n" + "rs.stepDown = function () { return db._adminCommand({ replSetStepDown:true}); }\n" + "rs.addArb = function (hn) { return this.add(hn, true); }\n" + "rs.conf = function () { return db.getSisterDB(\"local\").system.replset.findOne(); }\n" + "help = shellHelper.help = function (x) {\n" + "if (x == \"connect\") {\n" + "print(\"\\nNormally one specifies the server on the mongo shell command line. Run mongo --help to see those options.\");\n" + "print(\"Additional connections may be opened:\\n\");\n" + "print(\" var x = new Mongo('host[:port]');\");\n" + "print(\" var mydb = x.getDB('mydb');\");\n" + "print(\" or\");\n" + "print(\" var mydb = connect('host[:port]/mydb');\");\n" + "print(\"\\nNote: the REPL prompt only auto-reports getLastError() for the shell command line connection.\\n\");\n" + "return;}\n" + "if (x == \"misc\") {\n" + "print(\"\\tb = new BinData(subtype,base64str) create a BSON BinData value\");\n" + "print(\"\\tb.subtype() the BinData subtype (0..255)\");\n" + "print(\"\\tb.length() length of the BinData data in bytes\");\n" + "print(\"\\tb.hex() the data as a hex encoded string\");\n" + "print(\"\\tb.base64() the data as a base 64 encoded string\");\n" + "print(\"\\tb.toString()\");\n" + "return;}\n" + "if (x == \"admin\") {\n" + "print(\"\\tls([path]) list files\");\n" + "print(\"\\tpwd() returns current directory\");\n" + "print(\"\\tlistFiles([path]) returns file list\");\n" + "print(\"\\thostname() returns name of this host\");\n" + "print(\"\\tcat(fname) returns contents of text file as a string\");\n" + "print(\"\\tremoveFile(f) delete a file\");\n" + "print(\"\\tload(jsfilename) load and execute a .js file\");\n" + "print(\"\\trun(program[, args...]) spawn a program and wait for its completion\");\n" + "print(\"\\tsleep(m) sleep m milliseconds\");\n" + "print(\"\\tgetMemInfo() diagnostic\");\n" + "return;}\n" + "if (x == \"test\") {\n" + "print(\"\\tstartMongodEmpty(args) DELETES DATA DIR and then starts mongod\");\n" + "print(\"\\t returns a connection to the new server\");\n" + "print(\"\\tstartMongodTest() DELETES DATA DIR\");\n" + "print(\"\\t automatically picks port #s starting at 27000 and increasing\");\n" + "print(\"\\t or you can specify the port as the first arg\");\n" + "print(\"\\t dir is /data/db// if not specified as the 2nd arg\");\n" + "print(\"\\t returns a connection to the new server\");\n" + "return;}\n" + "print(\"\\t\" + \"db.help() help on db methods\");\n" + "print(\"\\t\" + \"db.mycoll.help() help on collection methods\");\n" + "print(\"\\t\" + \"rs.help() help on replica set methods\");\n" + "print(\"\\t\" + \"help connect connecting to a db help\");\n" + "print(\"\\t\" + \"help admin administrative help\");\n" + "print(\"\\t\" + \"help misc misc things to know\");\n" + "print();\n" + "print(\"\\t\" + \"show dbs show database names\");\n" + "print(\"\\t\" + \"show collections show collections in current database\");\n" + "print(\"\\t\" + \"show users show users in current database\");\n" + "print(\"\\t\" + \"show profile show most recent system.profile entries with time >= 1ms\");\n" + "print(\"\\t\" + \"use set current database\");\n" + "print(\"\\t\" + \"db.foo.find() list objects in collection foo\");\n" + "print(\"\\t\" + \"db.foo.find( { a : 1 } ) list objects in foo where a == 1\");\n" + "print(\"\\t\" + \"it result of the last line evaluated; use to further iterate\");\n" + "print(\"\\t\" + \"exit quit the mongo shell\");}\n" "if ( typeof DB == \"undefined\" ){\n" "DB = function( mongo , name ){\n" "this._mongo = mongo;\n" @@ -679,7 +853,7 @@ "print(\"\\tdb.commandHelp(name) returns the help for the command\");\n" "print(\"\\tdb.copyDatabase(fromdb, todb, fromhost)\");\n" "print(\"\\tdb.createCollection(name, { size : ..., capped : ..., max : ... } )\");\n" - "print(\"\\tdb.currentOp() displays the current operation in the db\" );\n" + "print(\"\\tdb.currentOp() displays the current operation in the db\");\n" "print(\"\\tdb.dropDatabase()\");\n" "print(\"\\tdb.eval(func, args) run code server-side\");\n" "print(\"\\tdb.getCollection(cname) same as db['cname'] or db.cname\");\n" @@ -692,9 +866,11 @@ "print(\"\\tdb.getPrevError()\");\n" "print(\"\\tdb.getProfilingLevel()\");\n" "print(\"\\tdb.getReplicationInfo()\");\n" - "print(\"\\tdb.getSisterDB(name) get the db at the same server as this onew\");\n" - "print(\"\\tdb.killOp(opid) kills the current operation in the db\" );\n" - "print(\"\\tdb.printCollectionStats()\" );\n" + "print(\"\\tdb.getSisterDB(name) get the db at the same server as this one\");\n" + "print(\"\\tdb.isMaster() check replica primary status\");\n" + "print(\"\\tdb.killOp(opid) kills the current operation in the db\");\n" + "print(\"\\tdb.listCommands() lists all the db commands\");\n" + "print(\"\\tdb.printCollectionStats()\");\n" "print(\"\\tdb.printReplicationInfo()\");\n" "print(\"\\tdb.printSlaveReplicationInfo()\");\n" "print(\"\\tdb.printShardingStatus()\");\n" @@ -702,10 +878,13 @@ "print(\"\\tdb.repairDatabase()\");\n" "print(\"\\tdb.resetError()\");\n" "print(\"\\tdb.runCommand(cmdObj) run a database command. if cmdObj is a string, turns it into { cmdObj : 1 }\");\n" + "print(\"\\tdb.serverStatus()\");\n" "print(\"\\tdb.setProfilingLevel(level,) 0=off 1=slow 2=all\");\n" "print(\"\\tdb.shutdownServer()\");\n" "print(\"\\tdb.stats()\");\n" - "print(\"\\tdb.version() current version of the server\" );}\n" + "print(\"\\tdb.version() current version of the server\");\n" + "print(\"\\tdb.getMongo().setSlaveOk() allow queries on a replication slave server\");\n" + "return __magicNoPrint;}\n" "DB.prototype.printCollectionStats = function(){\n" "var mydb = this;\n" "this.getCollectionNames().forEach(\n" @@ -776,13 +955,18 @@ "return this.runCommand( { reseterror : 1 } );}\n" "DB.prototype.forceError = function(){\n" "return this.runCommand( { forceerror : 1 } );}\n" - "DB.prototype.getLastError = function(){\n" - "var res = this.runCommand( { getlasterror : 1 } );\n" + "DB.prototype.getLastError = function( w , wtimeout ){\n" + "var res = this.getLastErrorObj( w , wtimeout );\n" "if ( ! res.ok )\n" "throw \"getlasterror failed: \" + tojson( res );\n" "return res.err;}\n" - "DB.prototype.getLastErrorObj = function(){\n" - "var res = this.runCommand( { getlasterror : 1 } );\n" + "DB.prototype.getLastErrorObj = function( w , wtimeout ){\n" + "var cmd = { getlasterror : 1 };\n" + "if ( w ){\n" + "cmd.w = w;\n" + "if ( wtimeout )\n" + "cmd.wtimeout = wtimeout;}\n" + "var res = this.runCommand( cmd );\n" "if ( ! res.ok )\n" "throw \"getlasterror failed: \" + tojson( res );\n" "return res;}\n" @@ -797,18 +981,18 @@ "DB.prototype.getCollectionNames = function(){\n" "var all = [];\n" "var nsLength = this._name.length + 1;\n" - "this.getCollection( \"system.namespaces\" ).find().sort({name:1}).forEach(\n" - "function(z){\n" - "var name = z.name;\n" - "if ( name.indexOf( \"$\" ) >= 0 && name != \"local.oplog.$main\" )\n" - "return;\n" + "var c = this.getCollection( \"system.namespaces\" ).find();\n" + "while ( c.hasNext() ){\n" + "var name = c.next().name;\n" + "if ( name.indexOf( \"$\" ) >= 0 && name.indexOf( \".oplog.$\" ) < 0 )\n" + "continue;\n" "all.push( name.substring( nsLength ) );}\n" - ");\n" - "return all;}\n" + "return all.sort();}\n" "DB.prototype.tojson = function(){\n" "return this._name;}\n" "DB.prototype.toString = function(){\n" "return this._name;}\n" + "DB.prototype.isMaster = function () { return this.runCommand(\"isMaster\"); }\n" "DB.prototype.currentOp = function(){\n" "return db.$cmd.sys.inprog.findOne();}\n" "DB.prototype.currentOP = DB.prototype.currentOp;\n" @@ -865,13 +1049,17 @@ "print(\"now: \" + result.now);}\n" "DB.prototype.printSlaveReplicationInfo = function() {\n" "function g(x) {\n" + "assert( x , \"how could this be null (printSlaveReplicationInfo gx)\" )\n" "print(\"source: \" + x.host);\n" + "if ( x.syncedTo ){\n" "var st = new Date( DB.tsToSeconds( x.syncedTo ) * 1000 );\n" "var now = new Date();\n" - "print(\"syncedTo: \" + st.toString() );\n" + "print(\"\\t syncedTo: \" + st.toString() );\n" "var ago = (now-st)/1000;\n" "var hrs = Math.round(ago/36)/100;\n" - "print(\" = \" + Math.round(ago) + \"secs ago (\" + hrs + \"hrs)\");}\n" + "print(\"\\t\\t = \" + Math.round(ago) + \"secs ago (\" + hrs + \"hrs)\");}\n" + "else {\n" + "print( \"\\t doing initial sync\" );}}\n" "var L = this.getSisterDB(\"local\");\n" "if( L.sources.count() == 0 ) {\n" "print(\"local.sources is empty; is this db a --slave?\");\n" @@ -881,8 +1069,26 @@ "return this._adminCommand( \"buildinfo\" );}\n" "DB.prototype.serverStatus = function(){\n" "return this._adminCommand( \"serverStatus\" );}\n" + "DB.prototype.serverCmdLineOpts = function(){\n" + "return this._adminCommand( \"getCmdLineOpts\" );}\n" "DB.prototype.version = function(){\n" "return this.serverBuildInfo().version;}\n" + "DB.prototype.listCommands = function(){\n" + "var x = this.runCommand( \"listCommands\" );\n" + "for ( var name in x.commands ){\n" + "var c = x.commands[name];\n" + "var s = name + \": \";\n" + "switch ( c.lockType ){\n" + "case -1: s += \"read-lock\"; break;\n" + "case 0: s += \"no-lock\"; break;\n" + "case 1: s += \"write-lock\"; break;\n" + "default: s += c.lockType;}\n" + "if (c.adminOnly) s += \" adminOnly \";\n" + "if (c.adminOnly) s += \" slaveOk \";\n" + "s += \"\\n \";\n" + "s += c.help.replace(/\\n/g, '\\n ');\n" + "s += \"\\n\";\n" + "print( s );}}\n" "DB.prototype.printShardingStatus = function(){\n" "printShardingStatus( this.getSisterDB( \"config\" ) );}\n" "if ( typeof Mongo == \"undefined\" ){\n" @@ -921,12 +1127,13 @@ "var c = ns.substring( idx + 1 );\n" "return this.getDB( db ).getCollection( c );}\n" "Mongo.prototype.toString = function(){\n" - "return \"mongo connection to \" + this.host;}\n" + "return \"connection to \" + this.host;}\n" + "Mongo.prototype.tojson = Mongo.prototype.toString;\n" "connect = function( url , user , pass ){\n" "chatty( \"connecting to: \" + url )\n" "if ( user && ! pass )\n" "throw \"you specified a user and not a password. either you need a password, or you're using the old connect api\";\n" - "var idx = url.indexOf( \"/\" );\n" + "var idx = url.lastIndexOf( \"/\" );\n" "var db;\n" "if ( idx < 0 )\n" "db = new Mongo().getDB( url );\n" @@ -1014,18 +1221,25 @@ "this._batchSize = batchSize || 0;\n" "this._cursor = null;\n" "this._numReturned = 0;\n" - "this._special = false;}\n" + "this._special = false;\n" + "this._prettyShell = false;}\n" "print( \"DBQuery probably won't have array access \" );}\n" - "DBQuery.prototype.help = function(){\n" - "print( \"DBQuery help\" );\n" - "print( \"\\t.sort( {...} )\" )\n" - "print( \"\\t.limit( n )\" )\n" - "print( \"\\t.skip( n )\" )\n" - "print( \"\\t.count() - total # of objects matching query, ignores skip,limit\" )\n" - "print( \"\\t.size() - total # of objects cursor would return skip,limit effect this\" )\n" - "print( \"\\t.explain()\" )\n" - "print( \"\\t.forEach( func )\" )\n" - "print( \"\\t.map( func )\" )}\n" + "DBQuery.prototype.help = function () {\n" + "print(\"find() modifiers\")\n" + "print(\"\\t.sort( {...} )\")\n" + "print(\"\\t.limit( n )\")\n" + "print(\"\\t.skip( n )\")\n" + "print(\"\\t.count() - total # of objects matching query, ignores skip,limit\")\n" + "print(\"\\t.size() - total # of objects cursor would return, honors skip,limit\")\n" + "print(\"\\t.explain([verbose])\")\n" + "print(\"\\t.hint(...)\")\n" + "print(\"\\t.showDiskLoc() - adds a $diskLoc field to each returned object\")\n" + "print(\"\\nCursor methods\");\n" + "print(\"\\t.forEach( func )\")\n" + "print(\"\\t.print() - output to console in full pretty format\")\n" + "print(\"\\t.map( func )\")\n" + "print(\"\\t.hasNext()\")\n" + "print(\"\\t.next()\")}\n" "DBQuery.prototype.clone = function(){\n" "var q = new DBQuery( this._mongo , this._db , this._collection , this._ns ,\n" "this._query , this._fields ,\n" @@ -1077,6 +1291,12 @@ "throw \"error: \" + tojson( ret );\n" "this._numReturned++;\n" "return ret;}\n" + "DBQuery.prototype.objsLeftInBatch = function(){\n" + "this._exec();\n" + "var ret = this._cursor.objsLeftInBatch();\n" + "if ( ret.$err )\n" + "throw \"error: \" + tojson( ret );\n" + "return ret;}\n" "DBQuery.prototype.toArray = function(){\n" "if ( this._arr )\n" "return this._arr;\n" @@ -1131,6 +1351,8 @@ "return this._addSpecial( \"$min\" , min );}\n" "DBQuery.prototype.max = function( max ) {\n" "return this._addSpecial( \"$max\" , max );}\n" + "DBQuery.prototype.showDiskLoc = function() {\n" + "return this._addSpecial( \"$showDiskLoc\" , true);}\n" "DBQuery.prototype.forEach = function( func ){\n" "while ( this.hasNext() )\n" "func( this.next() );}\n" @@ -1141,21 +1363,29 @@ "return a;}\n" "DBQuery.prototype.arrayAccess = function( idx ){\n" "return this.toArray()[idx];}\n" - "DBQuery.prototype.explain = function(){\n" + "DBQuery.prototype.explain = function (verbose) {\n" + "/* verbose=true --> include allPlans, oldPlan fields */\n" "var n = this.clone();\n" "n._ensureSpecial();\n" "n._query.$explain = true;\n" "n._limit = n._limit * -1;\n" - "return n.next();}\n" + "var e = n.next();\n" + "if (!verbose) {\n" + "delete e.allPlans;\n" + "delete e.oldPlan;}\n" + "return e;}\n" "DBQuery.prototype.snapshot = function(){\n" "this._ensureSpecial();\n" "this._query.$snapshot = true;\n" "return this;}\n" + "DBQuery.prototype.pretty = function(){\n" + "this._prettyShell = true;\n" + "return this;}\n" "DBQuery.prototype.shellPrint = function(){\n" "try {\n" "var n = 0;\n" - "while ( this.hasNext() && n < 20 ){\n" - "var s = tojson( this.next() , \"\" , true );\n" + "while ( this.hasNext() && n < DBQuery.shellBatchSize ){\n" + "var s = this._prettyShell ? tojson( this.next() ) : tojson( this.next() , \"\" , true );\n" "print( s );\n" "n++;}\n" "if ( this.hasNext() ){\n" @@ -1167,6 +1397,8 @@ "print( e );}}\n" "DBQuery.prototype.toString = function(){\n" "return \"DBQuery: \" + this._ns + \" -> \" + tojson( this.query );}\n" + "DBQuery.shellBatchSize = 20;\n" + "// or db[\"colName\"]\n" "if ( ( typeof DBCollection ) == \"undefined\" ){\n" "DBCollection = function( mongo , db , shortName , fullName ){\n" "this._mongo = mongo;\n" @@ -1182,39 +1414,42 @@ "assert( this._mongo , \"no mongo in DBCollection\" );}\n" "DBCollection.prototype.getName = function(){\n" "return this._shortName;}\n" - "DBCollection.prototype.help = function() {\n" + "DBCollection.prototype.help = function () {\n" + "var shortName = this.getName();\n" "print(\"DBCollection help\");\n" - "print(\"\\tdb.foo.count()\");\n" - "print(\"\\tdb.foo.dataSize()\");\n" - "print(\"\\tdb.foo.distinct( key ) - eg. db.foo.distinct( 'x' )\");\n" - "print(\"\\tdb.foo.drop() drop the collection\");\n" - "print(\"\\tdb.foo.dropIndex(name)\");\n" - "print(\"\\tdb.foo.dropIndexes()\");\n" - "print(\"\\tdb.foo.ensureIndex(keypattern,options) - options should be an object with these possible fields: name, unique, dropDups\");\n" - "print(\"\\tdb.foo.reIndex()\");\n" - "print(\"\\tdb.foo.find( [query] , [fields]) - first parameter is an optional query filter. second parameter is optional set of fields to return.\");\n" - "print(\"\\t e.g. db.foo.find( { x : 77 } , { name : 1 , x : 1 } )\");\n" - "print(\"\\tdb.foo.find(...).count()\");\n" - "print(\"\\tdb.foo.find(...).limit(n)\");\n" - "print(\"\\tdb.foo.find(...).skip(n)\");\n" - "print(\"\\tdb.foo.find(...).sort(...)\");\n" - "print(\"\\tdb.foo.findOne([query])\");\n" - "print(\"\\tdb.foo.findAndModify( { update : ... , remove : bool [, query: {}, sort: {}, 'new': false] } )\");\n" - "print(\"\\tdb.foo.getDB() get DB object associated with collection\");\n" - "print(\"\\tdb.foo.getIndexes()\");\n" - "print(\"\\tdb.foo.group( { key : ..., initial: ..., reduce : ...[, cond: ...] } )\");\n" - "print(\"\\tdb.foo.mapReduce( mapFunction , reduceFunction , )\");\n" - "print(\"\\tdb.foo.remove(query)\");\n" - "print(\"\\tdb.foo.renameCollection( newName , ) renames the collection.\");\n" - "print(\"\\tdb.foo.runCommand( name , ) runs a db command with the given name where the 1st param is the colleciton name\" );\n" - "print(\"\\tdb.foo.save(obj)\");\n" - "print(\"\\tdb.foo.stats()\");\n" - "print(\"\\tdb.foo.storageSize() - includes free space allocated to this collection\");\n" - "print(\"\\tdb.foo.totalIndexSize() - size in bytes of all the indexes\");\n" - "print(\"\\tdb.foo.totalSize() - storage allocated for all data and indexes\");\n" - "print(\"\\tdb.foo.update(query, object[, upsert_bool, multi_bool])\");\n" - "print(\"\\tdb.foo.validate() - SLOW\");\n" - "print(\"\\tdb.foo.getShardVersion() - only for use with sharding\");}\n" + "print(\"\\tdb.\" + shortName + \".find().help() - show DBCursor help\");\n" + "print(\"\\tdb.\" + shortName + \".count()\");\n" + "print(\"\\tdb.\" + shortName + \".dataSize()\");\n" + "print(\"\\tdb.\" + shortName + \".distinct( key ) - eg. db.\" + shortName + \".distinct( 'x' )\");\n" + "print(\"\\tdb.\" + shortName + \".drop() drop the collection\");\n" + "print(\"\\tdb.\" + shortName + \".dropIndex(name)\");\n" + "print(\"\\tdb.\" + shortName + \".dropIndexes()\");\n" + "print(\"\\tdb.\" + shortName + \".ensureIndex(keypattern,options) - options should be an object with these possible fields: name, unique, dropDups\");\n" + "print(\"\\tdb.\" + shortName + \".reIndex()\");\n" + "print(\"\\tdb.\" + shortName + \".find( [query] , [fields]) - first parameter is an optional query filter. second parameter is optional set of fields to return.\");\n" + "print(\"\\t e.g. db.\" + shortName + \".find( { x : 77 } , { name : 1 , x : 1 } )\");\n" + "print(\"\\tdb.\" + shortName + \".find(...).count()\");\n" + "print(\"\\tdb.\" + shortName + \".find(...).limit(n)\");\n" + "print(\"\\tdb.\" + shortName + \".find(...).skip(n)\");\n" + "print(\"\\tdb.\" + shortName + \".find(...).sort(...)\");\n" + "print(\"\\tdb.\" + shortName + \".findOne([query])\");\n" + "print(\"\\tdb.\" + shortName + \".findAndModify( { update : ... , remove : bool [, query: {}, sort: {}, 'new': false] } )\");\n" + "print(\"\\tdb.\" + shortName + \".getDB() get DB object associated with collection\");\n" + "print(\"\\tdb.\" + shortName + \".getIndexes()\");\n" + "print(\"\\tdb.\" + shortName + \".group( { key : ..., initial: ..., reduce : ...[, cond: ...] } )\");\n" + "print(\"\\tdb.\" + shortName + \".mapReduce( mapFunction , reduceFunction , )\");\n" + "print(\"\\tdb.\" + shortName + \".remove(query)\");\n" + "print(\"\\tdb.\" + shortName + \".renameCollection( newName , ) renames the collection.\");\n" + "print(\"\\tdb.\" + shortName + \".runCommand( name , ) runs a db command with the given name where the first param is the collection name\");\n" + "print(\"\\tdb.\" + shortName + \".save(obj)\");\n" + "print(\"\\tdb.\" + shortName + \".stats()\");\n" + "print(\"\\tdb.\" + shortName + \".storageSize() - includes free space allocated to this collection\");\n" + "print(\"\\tdb.\" + shortName + \".totalIndexSize() - size in bytes of all the indexes\");\n" + "print(\"\\tdb.\" + shortName + \".totalSize() - storage allocated for all data and indexes\");\n" + "print(\"\\tdb.\" + shortName + \".update(query, object[, upsert_bool, multi_bool])\");\n" + "print(\"\\tdb.\" + shortName + \".validate() - SLOW\");\n" + "print(\"\\tdb.\" + shortName + \".getShardVersion() - only for use with sharding\");\n" + "return __magicNoPrint;}\n" "DBCollection.prototype.getFullName = function(){\n" "return this._fullName;}\n" "DBCollection.prototype.getDB = function(){\n" @@ -1280,7 +1515,7 @@ "for (var key in tmp){\n" "obj[key] = tmp[key];}}\n" "this._mongo.insert( this._fullName , obj );\n" - "return obj._id;}\n" + "this._lastID = obj._id;}\n" "DBCollection.prototype.remove = function( t ){\n" "this._mongo.remove( this._fullName , this._massageObject( t ) );}\n" "DBCollection.prototype.update = function( query , obj , upsert , multi ){\n" @@ -1391,7 +1626,7 @@ "var ret = this._db.runCommand( cmd );\n" "if ( ! ret.ok ){\n" "if (ret.errmsg == \"No matching object found\"){\n" - "return {};}\n" + "return null;}\n" "throw \"findAndModifyFailed failed: \" + tojson( ret.errmsg );}\n" "return ret.value;}\n" "DBCollection.prototype.renameCollection = function( newName , dropTarget ){\n" @@ -1401,8 +1636,9 @@ "DBCollection.prototype.validate = function() {\n" "var res = this._db.runCommand( { validate: this.getName() } );\n" "res.valid = false;\n" - "if ( res.result ){\n" - "var str = \"-\" + tojson( res.result );\n" + "var raw = res.result || res.raw;\n" + "if ( raw ){\n" + "var str = \"-\" + tojson( raw );\n" "res.valid = ! ( str.match( /exception/ ) || str.match( /corrupt/ ) );\n" "var p = /lastExtentSize:(\\d+)/;\n" "var r = p.exec( str );\n" @@ -1470,6 +1706,7 @@ "function( spec ){\n" "var coll = mydb.getCollection( shortName + \".$\" + spec.name );\n" "var mysize = coll.storageSize();\n" + "//print( coll + \"\\t\" + mysize + \"\\t\" + tojson( coll.validate() ) );\n" "total += coll.dataSize();}\n" ");\n" "return total;}\n" Binary files /tmp/eJqezWvDkt/mongodb-1.4.4/shell/msvc/mongo.ico and /tmp/oJOREzsx7Y/mongodb-1.6.3/shell/msvc/mongo.ico differ diff -Nru mongodb-1.4.4/shell/msvc/mongo.sln mongodb-1.6.3/shell/msvc/mongo.sln --- mongodb-1.4.4/shell/msvc/mongo.sln 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/shell/msvc/mongo.sln 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongo", "mongo.vcxproj", "{FE959BD8-8EE2-4555-AE59-9FA14FFD410E}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {FE959BD8-8EE2-4555-AE59-9FA14FFD410E}.Debug|Win32.ActiveCfg = Debug|Win32 + {FE959BD8-8EE2-4555-AE59-9FA14FFD410E}.Debug|Win32.Build.0 = Debug|Win32 + {FE959BD8-8EE2-4555-AE59-9FA14FFD410E}.Release|Win32.ActiveCfg = Release|Win32 + {FE959BD8-8EE2-4555-AE59-9FA14FFD410E}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff -Nru mongodb-1.4.4/shell/msvc/mongo.vcxproj mongodb-1.6.3/shell/msvc/mongo.vcxproj --- mongodb-1.4.4/shell/msvc/mongo.vcxproj 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/shell/msvc/mongo.vcxproj 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,253 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {FE959BD8-8EE2-4555-AE59-9FA14FFD410E} + Win32Proj + mongo + + + + Application + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + true + \boost\lib\vs2010_32\;$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSdkDir)lib;$(FrameworkSDKDir)\lib + $(VCInstallDir)bin;$(WindowsSdkDir)bin\NETFX 4.0 Tools;$(WindowsSdkDir)bin;$(VSInstallDir)Common7\Tools\bin;$(VSInstallDir)Common7\tools;$(VSInstallDir)Common7\ide;$(ProgramFiles)\HTML Help Workshop;$(FrameworkSDKDir)\bin;$(MSBuildToolsPath32);$(VSInstallDir);$(SystemRoot)\SysWow64;$(FxCopDir);$(PATH); + ..\..\..\readline\include;..\..\..\js\src\;..\..\pcre-7.4;..\..\;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSdkDir)include;$(FrameworkSDKDir)\include + + + ..\..\..\readline\include;..\..\..\js\src\;..\..\pcre-7.4;..\..\;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSdkDir)include;$(FrameworkSDKDir)\include + false + \boost\lib\vs2010_32\;$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSdkDir)lib;$(FrameworkSDKDir)\lib + + + + Use + Level3 + Disabled + USE_READLINE;XP_WIN;PCRE_STATIC;HAVE_CONFIG_H;OLDJS;MONGO_EXPOSE_MACROS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + \boost\ + pch.h + 4355;4800;4267;4244;%(DisableSpecificWarnings) + + + Console + true + ws2_32.lib;psapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + Use + MaxSpeed + true + true + USE_READLINE;XP_WIN;_WIN32;PCRE_STATIC;HAVE_CONFIG_H;OLDJS;MONGO_EXPOSE_MACROS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + \boost\ + pch.h + true + MultiThreaded + 4355;4800;4267;4244;%(DisableSpecificWarnings) + + + Console + true + true + true + ws2_32.lib;psapi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + NotUsing + NotUsing + + + NotUsing + + + NotUsing + NotUsing + + + NotUsing + + + NotUsing + NotUsing + + + + + + + + + + + + + + + + + + NotUsing + + + + + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + + + + + + Create + pch.h + + + + + + + + NotUsing + NotUsing + + + NotUsing + NotUsing + + + + + + + + + + + + + + + + true + + + true + + + + + + + + + + + + + \ No newline at end of file diff -Nru mongodb-1.4.4/shell/msvc/mongo.vcxproj.filters mongodb-1.6.3/shell/msvc/mongo.vcxproj.filters --- mongodb-1.4.4/shell/msvc/mongo.vcxproj.filters 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/shell/msvc/mongo.vcxproj.filters 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,262 @@ + + + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {2a0d6120-434d-4732-ac31-2a7bf077f6ee} + + + {a1e59094-b70c-463a-8dc1-691efe337f14} + + + {2d0fd975-0cc9-43dc-ac8e-53cb8c3a0040} + + + {a33442e2-39da-4c70-8310-6de9fa70cd71} + + + {1044ce7b-72c4-4892-82c0-f46d8708a6ff} + + + {fc0f6c1a-9627-4254-9b5e-0bcb8b3257f3} + + + {30b62472-d7a7-4b8a-8a07-d7d341bc6252} + + + {291e0d72-13ca-42d7-b0fd-2e7b5f89639f} + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {473e7192-9f2a-47c5-ad95-e5b75d4f48f9} + + + {96e4c411-7ab4-4bcd-b7c6-a33059f5d492} + + + + + shell + + + util + + + util\concurrency + + + scripting + + + db + + + client + + + client + + + + shared source files + + + shell + + + shell + + + shared source files + + + shared source files + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + pcre + + + client + + + util + + + db + + + scripting + + + util + + + util + + + util + + + client + + + util + + + util + + + util + + + util + + + client + + + pcre + + + pcre + + + pcre + + + shell + + + shell + + + util + + + shell + + + shared source files + + + shared source files + + + shell + + + scripting + + + util + + + db + + + shell\generated_from_js + + + shell\generated_from_js + + + shared source files + + + + + + _js files + + + _js files + + + _js files + + + _js files + + + _js files + + + _js files + + + _js files + + + + + + + + + + db + + + + + Resource Files + + + \ No newline at end of file diff -Nru mongodb-1.4.4/shell/query.js mongodb-1.6.3/shell/query.js --- mongodb-1.4.4/shell/query.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/query.js 2010-09-24 10:02:42.000000000 -0700 @@ -17,21 +17,27 @@ this._cursor = null; this._numReturned = 0; this._special = false; + this._prettyShell = false; } print( "DBQuery probably won't have array access " ); } -DBQuery.prototype.help = function(){ - print( "DBQuery help" ); - print( "\t.sort( {...} )" ) - print( "\t.limit( n )" ) - print( "\t.skip( n )" ) - print( "\t.count() - total # of objects matching query, ignores skip,limit" ) - print( "\t.size() - total # of objects cursor would return skip,limit effect this" ) - print( "\t.explain()" ) - print( "\t.forEach( func )" ) - print( "\t.map( func )" ) - +DBQuery.prototype.help = function () { + print("find() modifiers") + print("\t.sort( {...} )") + print("\t.limit( n )") + print("\t.skip( n )") + print("\t.count() - total # of objects matching query, ignores skip,limit") + print("\t.size() - total # of objects cursor would return, honors skip,limit") + print("\t.explain([verbose])") + print("\t.hint(...)") + print("\t.showDiskLoc() - adds a $diskLoc field to each returned object") + print("\nCursor methods"); + print("\t.forEach( func )") + print("\t.print() - output to console in full pretty format") + print("\t.map( func )") + print("\t.hasNext()") + print("\t.next()") } DBQuery.prototype.clone = function(){ @@ -110,6 +116,16 @@ return ret; } +DBQuery.prototype.objsLeftInBatch = function(){ + this._exec(); + + var ret = this._cursor.objsLeftInBatch(); + if ( ret.$err ) + throw "error: " + tojson( ret ); + + return ret; +} + DBQuery.prototype.toArray = function(){ if ( this._arr ) return this._arr; @@ -197,6 +213,10 @@ return this._addSpecial( "$max" , max ); } +DBQuery.prototype.showDiskLoc = function() { + return this._addSpecial( "$showDiskLoc" , true); +} + DBQuery.prototype.forEach = function( func ){ while ( this.hasNext() ) func( this.next() ); @@ -213,12 +233,32 @@ return this.toArray()[idx]; } -DBQuery.prototype.explain = function(){ +DBQuery.prototype.explain = function (verbose) { + /* verbose=true --> include allPlans, oldPlan fields */ var n = this.clone(); n._ensureSpecial(); n._query.$explain = true; n._limit = n._limit * -1; - return n.next(); + var e = n.next(); + if (!verbose) { + delete e.allPlans; + delete e.oldPlan; + if (e.shards){ + for (var key in e.shards){ + var s = e.shards[key]; + if(s.length === undefined){ + delete s.allPlans; + delete s.oldPlan; + } else { + for (var i=0; i < s.length; i++){ + delete s[i].allPlans; + delete s[i].oldPlan; + } + } + } + } + } + return e; } DBQuery.prototype.snapshot = function(){ @@ -227,11 +267,16 @@ return this; } +DBQuery.prototype.pretty = function(){ + this._prettyShell = true; + return this; +} + DBQuery.prototype.shellPrint = function(){ try { var n = 0; - while ( this.hasNext() && n < 20 ){ - var s = tojson( this.next() , "" , true ); + while ( this.hasNext() && n < DBQuery.shellBatchSize ){ + var s = this._prettyShell ? tojson( this.next() ) : tojson( this.next() , "" , true ); print( s ); n++; } @@ -252,3 +297,5 @@ DBQuery.prototype.toString = function(){ return "DBQuery: " + this._ns + " -> " + tojson( this.query ); } + +DBQuery.shellBatchSize = 20; diff -Nru mongodb-1.4.4/shell/servers.js mongodb-1.6.3/shell/servers.js --- mongodb-1.4.4/shell/servers.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/servers.js 2010-09-24 10:02:42.000000000 -0700 @@ -53,38 +53,57 @@ return fullArgs; } -startMongodTest = function( port , dirname , restart ){ - var f = startMongod; - if ( restart ) +__nextPort = 27000; +startMongodTest = function (port, dirname, restart, extraOptions ) { + if (!port) + port = __nextPort++; + var f = startMongodEmpty; + if (restart) f = startMongodNoReset; - var conn = f.apply( null , [ - { - port : port , - dbpath : "/data/db/" + dirname , - noprealloc : "" , - smallfiles : "" , - oplogSize : "2" , - nohttpinterface : "" - } - ] - ); - conn.name = "localhost:" + port; + if (!dirname) + dirname = "" + port; // e.g., data/db/27000 + + var useHostname = false; + if (extraOptions) { + useHostname = extraOptions.useHostname; + delete extraOptions.useHostname; + } + + + var options = + { + port: port, + dbpath: "/data/db/" + dirname, + noprealloc: "", + smallfiles: "", + oplogSize: "2", + nohttpinterface: "" + }; + + if ( extraOptions ) + Object.extend( options , extraOptions ); + + var conn = f.apply(null, [ options ] ); + + conn.name = (useHostname ? getHostName() : "localhost") + ":" + port; return conn; } // Start a mongod instance and return a 'Mongo' object connected to it. // This function's arguments are passed as command line arguments to mongod. // The specified 'dbpath' is cleared if it exists, created if not. -startMongod = function(){ +startMongodEmpty = function () { + var args = createMongoArgs("mongod", arguments); - var args = createMongoArgs( "mongod" , arguments ); - - var dbpath = _parsePath.apply( null, args ); - resetDbpath( dbpath ); + var dbpath = _parsePath.apply(null, args); + resetDbpath(dbpath); - return startMongoProgram.apply( null, args ); + return startMongoProgram.apply(null, args); +} +startMongod = function () { + print("WARNING DELETES DATA DIRECTORY THIS IS FOR TESTING RENAME YOUR INVOCATION"); + return startMongodEmpty.apply(null, arguments); } - startMongodNoReset = function(){ var args = createMongoArgs( "mongod" , arguments ); return startMongoProgram.apply( null, args ); @@ -94,10 +113,11 @@ return startMongoProgram.apply( null, createMongoArgs( "mongos" , arguments ) ); } -// Start a mongo program instance (generally mongod or mongos) and return a -// 'Mongo' object connected to it. This function's first argument is the -// program name, and subsequent arguments to this function are passed as -// command line arguments to the program. +/* Start mongod or mongos and return a Mongo() object connected to there. + This function's first argument is "mongod" or "mongos" program name, \ + and subsequent arguments to this function are passed as + command line arguments to the program. +*/ startMongoProgram = function(){ var port = _parsePort.apply( null, arguments ); @@ -132,35 +152,79 @@ return 27017; } -ShardingTest = function( testName , numServers , verboseLevel , numMongos , otherParams ){ +/** + * otherParams can be: + * * useHostname to use the hostname (instead of localhost) + */ +ShardingTest = function( testName , numShards , verboseLevel , numMongos , otherParams ){ + this._testName = testName; + if ( ! otherParams ) otherParams = {} this._connections = []; - if ( otherParams.sync && numServers < 3 ) + if ( otherParams.sync && numShards < 3 ) throw "if you want sync, you need at least 3 servers"; - for ( var i=0; i " + tojsononeline( r.max ); +} + +ShardingTest.prototype.printChangeLog = function(){ + var s = this; + this.config.changelog.find().forEach( + function(z){ + var msg = z.server + "\t" + z.time + "\t" + z.what; + for ( i=z.what.length; i<15; i++ ) + msg += " "; + msg += " " + z.ns + "\t"; + if ( z.what == "split" ){ + msg += s._rangeToString( z.details.before ) + " -->> (" + s._rangeToString( z.details.left ) + "),(" + s._rangeToString( z.details.right ) + ")"; + } + else if (z.what == "multi-split" ){ + msg += s._rangeToString( z.details.before ) + " -->> (" + z.details.number + "/" + z.details.of + " " + s._rangeToString( z.details.chunk ) + ")"; + } + else { + msg += tojsononeline( z.details ); + } + + print( msg ) + } + ); + +} + ShardingTest.prototype.getChunksString = function( ns ){ var q = {} if ( ns ) q.ns = ns; - return Array.tojson( this.config.chunks.find( q ).toArray() , "\n" ); + + var s = ""; + this.config.chunks.find( q ).sort( { ns : 1 , min : 1 } ).forEach( + function(z){ + s += " " + z._id + "\t" + z.lastmod.t + "|" + z.lastmod.i + "\t" + tojson(z.min) + " -> " + tojson(z.max) + " " + z.shard + " " + z.ns + "\n"; + } + ); + + return s; } ShardingTest.prototype.printChunks = function( ns ){ @@ -258,10 +420,14 @@ out += " mongos " + c + " " + tojson( c.getCollection( ns ).getShardVersion() , " " , true ) + "\n"; } + out += this.getChunksString( ns ); + print( out ); } printShardingStatus = function( configDB ){ + if (configDB === undefined) + configDB = db.getSisterDB('config') var version = configDB.getCollection( "version" ).findOne(); if ( version == null ){ @@ -285,17 +451,81 @@ output( " databases:" ); configDB.databases.find().sort( { name : 1 } ).forEach( + function(db){ + output( "\t" + tojson(db,"",true) ); + + if (db.partitioned){ + configDB.collections.find( { _id : new RegExp( "^" + db._id + "\." ) } ).sort( { _id : 1 } ).forEach( + function( coll ){ + output("\t\t" + coll._id + " chunks:"); + configDB.chunks.find( { "ns" : coll._id } ).sort( { min : 1 } ).forEach( + function(chunk){ + output( "\t\t\t" + tojson( chunk.min ) + " -->> " + tojson( chunk.max ) + + " on : " + chunk.shard + " " + tojson( chunk.lastmod ) ); + } + ); + } + ) + } + } + ); + + print( raw ); +} + +printShardingSizes = function(){ + configDB = db.getSisterDB('config') + + var version = configDB.getCollection( "version" ).findOne(); + if ( version == null ){ + print( "not a shard db!" ); + return; + } + + var raw = ""; + var output = function(s){ + raw += s + "\n"; + } + output( "--- Sharding Status --- " ); + output( " sharding version: " + tojson( configDB.getCollection( "version" ).findOne() ) ); + + output( " shards:" ); + var shards = {}; + configDB.shards.find().forEach( function(z){ - output( "\t" + tojson(z,"",true) ); + shards[z._id] = new Mongo(z.host); + output( " " + tojson(z) ); + } + ); + + var saveDB = db; + output( " databases:" ); + configDB.databases.find().sort( { name : 1 } ).forEach( + function(db){ + output( "\t" + tojson(db,"",true) ); - output( "\t\tmy chunks" ); - - configDB.chunks.find( { "ns" : new RegExp( "^" + z.name ) } ).sort( { ns : 1 } ).forEach( - function(z){ - output( "\t\t\t" + z.ns + " " + tojson( z.min ) + " -->> " + tojson( z.max ) + - " on : " + z.shard + " " + tojson( z.lastmod ) ); - } - ); + if (db.partitioned){ + configDB.collections.find( { _id : new RegExp( "^" + db._id + "\." ) } ).sort( { _id : 1 } ).forEach( + function( coll ){ + output("\t\t" + coll._id + " chunks:"); + configDB.chunks.find( { "ns" : coll._id } ).sort( { min : 1 } ).forEach( + function(chunk){ + var mydb = shards[chunk.shard].getDB(db._id) + var out = mydb.runCommand({dataSize: coll._id, + keyPattern: coll.key, + min: chunk.min, + max: chunk.max }); + delete out.millis; + delete out.ok; + + output( "\t\t\t" + tojson( chunk.min ) + " -->> " + tojson( chunk.max ) + + " on : " + chunk.shard + " " + tojson( out ) ); + + } + ); + } + ) + } } ); @@ -316,6 +546,38 @@ return num; } + +ShardingTest.prototype.shardCounts = function( collName , dbName ){ + this.sync(); // we should sync since we're going directly to mongod here + dbName = dbName || "test"; + var counts = {} + for ( var i=0; i timeout) { + throw('[' + opts['desc'] + ']' + " timed out"); + } + } + + return result; +} + +ReplSetTest.prototype.initiate = function( cfg , initCmd , timeout ) { + var master = this.nodes[0].getDB("admin"); + var config = cfg || this.getReplSetConfig(); + var cmd = {}; + var cmdKey = initCmd || 'replSetInitiate'; + var timeout = timeout || 30000; + cmd[cmdKey] = config; + printjson(cmd); + + this.attempt({timeout: timeout, desc: "Initiate replica set"}, function() { + var result = master.runCommand(cmd); + printjson(result); + return result['ok'] == 1; + }); +} + +ReplSetTest.prototype.reInitiate = function() { + var master = this.nodes[0]; + var c = master.getDB("local")['system.replset'].findOne(); + var config = this.getReplSetConfig(); + config.version = c.version + 1; + this.initiate( config , 'replSetReconfig' ); +} + +ReplSetTest.prototype.awaitReplication = function() { + this.getMaster(); + + latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts'] + print(latest); + + this.attempt({context: this, timeout: 30000, desc: "awaiting replication"}, + function() { + var synced = true; + for(var i=0; i + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +# define SIGKILL 9 +#else +# include +# include +# include +# include +# include +#endif + +#include "../client/dbclient.h" +#include "../util/processinfo.h" +#include "utils.h" +#include "../util/text.h" +#include "../util/md5.hpp" + +extern const char * jsconcatcode_server; + +namespace mongo { +#ifdef _WIN32 + inline int close(int fd) { return _close(fd); } + inline int read(int fd, void* buf, size_t size) { return _read(fd, buf, size); } + inline int pipe(int fds[2]) { return _pipe(fds, 4096, _O_TEXT | _O_NOINHERIT); } +#endif + + namespace shellUtils { + + Scope* theScope = 0; + + std::string _dbConnect; + std::string _dbAuth; + + const char *argv0 = 0; + void RecordMyLocation( const char *_argv0 ) { argv0 = _argv0; } + + // helpers + + BSONObj makeUndefined() { + BSONObjBuilder b; + b.appendUndefined( "" ); + return b.obj(); + } + const BSONObj undefined_ = makeUndefined(); + + BSONObj encapsulate( const BSONObj &obj ) { + return BSON( "" << obj ); + } + + // real methods + + mongo::BSONObj JSSleep(const mongo::BSONObj &args){ + assert( args.nFields() == 1 ); + assert( args.firstElement().isNumber() ); + int ms = int( args.firstElement().number() ); + { + auto_ptr< ScriptEngine::Unlocker > u = globalScriptEngine->newThreadUnlocker(); + sleepmillis( ms ); + } + return undefined_; + } + + void goingAwaySoon(); + BSONObj Quit(const BSONObj& args) { + // If not arguments are given first element will be EOO, which + // converts to the integer value 0. + goingAwaySoon(); + int exit_code = int( args.firstElement().number() ); + ::exit(exit_code); + return undefined_; + } + + BSONObj JSGetMemInfo( const BSONObj& args ){ + ProcessInfo pi; + uassert( 10258 , "processinfo not supported" , pi.supported() ); + + BSONObjBuilder e; + e.append( "virtual" , pi.getVirtualMemorySize() ); + e.append( "resident" , pi.getResidentSize() ); + + BSONObjBuilder b; + b.append( "ret" , e.obj() ); + + return b.obj(); + } + + +#ifndef MONGO_SAFE_SHELL + + BSONObj listFiles(const BSONObj& _args){ + static BSONObj cd = BSON( "0" << "." ); + BSONObj args = _args.isEmpty() ? cd : _args; + + uassert( 10257 , "need to specify 1 argument to listFiles" , args.nFields() == 1 ); + + BSONObjBuilder lst; + + string rootname = args.firstElement().valuestrsafe(); + path root( rootname ); + stringstream ss; + ss << "listFiles: no such directory: " << rootname; + string msg = ss.str(); + uassert( 12581, msg.c_str(), boost::filesystem::exists( root ) ); + + directory_iterator end; + directory_iterator i( root); + + int num =0; + while ( i != end ){ + path p = *i; + BSONObjBuilder b; + b << "name" << p.string(); + b.appendBool( "isDirectory", is_directory( p ) ); + if ( ! is_directory( p ) ){ + try { + b.append( "size" , (double)file_size( p ) ); + } + catch ( ... ){ + i++; + continue; + } + } + + stringstream ss; + ss << num; + string name = ss.str(); + lst.append( name, b.done() ); + num++; + i++; + } + + BSONObjBuilder ret; + ret.appendArray( "", lst.done() ); + return ret.obj(); + } + + BSONObj ls(const BSONObj& args) { + BSONObj o = listFiles(args); + if( !o.isEmpty() ) { + for( BSONObj::iterator i = o.firstElement().Obj().begin(); i.more(); ) { + BSONObj f = i.next().Obj(); + cout << f["name"].String(); + if( f["isDirectory"].trueValue() ) cout << '/'; + cout << '\n'; + } + cout.flush(); + } + return BSONObj(); + } + + BSONObj cd(const BSONObj& args) { +#if defined(_WIN32) + std::wstring dir = toWideString( args.firstElement().String().c_str() ); + if( SetCurrentDirectory(dir.c_str()) ) + return BSONObj(); +#else + string dir = args.firstElement().String(); +/* if( chdir(dir.c_str) ) == 0 ) + return BSONObj(); + */ + if( 1 ) return BSON(""<<"implementation not done for posix"); +#endif + return BSON( "" << "change directory failed" ); + } + + BSONObj pwd(const BSONObj&) { + boost::filesystem::path p = boost::filesystem::current_path(); + return BSON( "" << p.string() ); + } + + BSONObj hostname(const BSONObj&) { + return BSON( "" << getHostName() ); + } + + static BSONElement oneArg(const BSONObj& args) { + uassert( 12597 , "need to specify 1 argument" , args.nFields() == 1 ); + return args.firstElement(); + } + + const int CANT_OPEN_FILE = 13300; + + BSONObj cat(const BSONObj& args){ + BSONElement e = oneArg(args); + stringstream ss; + ifstream f(e.valuestrsafe()); + uassert(CANT_OPEN_FILE, "couldn't open file", f.is_open() ); + + streamsize sz = 0; + while( 1 ) { + char ch = 0; + // slow...maybe change one day + f.get(ch); + if( ch == 0 ) break; + ss << ch; + sz += 1; + uassert(13301, "cat() : file to big to load as a variable", sz < 1024 * 1024 * 16); + } + return BSON( "" << ss.str() ); + } + + BSONObj md5sumFile(const BSONObj& args){ + BSONElement e = oneArg(args); + stringstream ss; + FILE* f = fopen(e.valuestrsafe(), "rb"); + uassert(CANT_OPEN_FILE, "couldn't open file", f ); + + md5digest d; + md5_state_t st; + md5_init(&st); + + enum {BUFLEN = 4*1024}; + char buffer[BUFLEN]; + int bytes_read; + while( (bytes_read = fread(buffer, 1, BUFLEN, f)) ) { + md5_append( &st , (const md5_byte_t*)(buffer) , bytes_read ); + } + + md5_finish(&st, d); + return BSON( "" << digestToString( d ) ); + } + + BSONObj mkdir(const BSONObj& args){ + boost::filesystem::create_directories(args.firstElement().String()); + return BSON( "" << true ); + } + + BSONObj removeFile(const BSONObj& args){ + BSONElement e = oneArg(args); + bool found = false; + + path root( args.firstElement().valuestrsafe() ); + if ( boost::filesystem::exists( root ) ){ + found = true; + boost::filesystem::remove_all( root ); + } + + BSONObjBuilder b; + b.appendBool( "removed" , found ); + return b.obj(); + } + map< int, pair< pid_t, int > > dbs; + map< pid_t, int > shells; +#ifdef _WIN32 + map< pid_t, HANDLE > handles; +#endif + + mongo::mutex mongoProgramOutputMutex("mongoProgramOutputMutex"); + stringstream mongoProgramOutput_; + + void goingAwaySoon() { + mongo::mutex::scoped_lock lk( mongoProgramOutputMutex ); + mongo::goingAway = true; + } + + void writeMongoProgramOutputLine( int port, int pid, const char *line ) { + mongo::mutex::scoped_lock lk( mongoProgramOutputMutex ); + if( mongo::goingAway ) throw "program is terminating"; + stringstream buf; + if ( port > 0 ) + buf << "m" << port << "| " << line; + else + buf << "sh" << pid << "| " << line; + cout << buf.str() << endl; + mongoProgramOutput_ << buf.str() << endl; + } + + // only returns last 100000 characters + BSONObj RawMongoProgramOutput( const BSONObj &args ) { + mongo::mutex::scoped_lock lk( mongoProgramOutputMutex ); + string out = mongoProgramOutput_.str(); + size_t len = out.length(); + if ( len > 100000 ) + out = out.substr( len - 100000, 100000 ); + return BSON( "" << out ); + } + + BSONObj ClearRawMongoProgramOutput( const BSONObj &args ) { + mongo::mutex::scoped_lock lk( mongoProgramOutputMutex ); + mongoProgramOutput_.str( "" ); + return undefined_; + } + + class ProgramRunner { + vector argv_; + int port_; + int pipe_; + pid_t pid_; + public: + pid_t pid() const { return pid_; } + int port() const { return port_; } + + boost::filesystem::path find(string prog) { + boost::filesystem::path p = prog; +#ifdef _WIN32 + p = change_extension(p, ".exe"); +#endif + + if( boost::filesystem::exists(p) ){ +#ifndef _WIN32 + p = boost::filesystem::initial_path() / p; +#endif + return p; + } + + { + boost::filesystem::path t = boost::filesystem::current_path() / p; + if( boost::filesystem::exists(t) ) return t; + } + try { + if( theScope->type("_path") == String ) { + string path = theScope->getString("_path"); + if( !path.empty() ) { + boost::filesystem::path t = boost::filesystem::path(path) / p; + if( boost::filesystem::exists(t) ) return t; + } + } + } catch(...) { } + { + boost::filesystem::path t = boost::filesystem::initial_path() / p; + if( boost::filesystem::exists(t) ) return t; + } + return p; // not found; might find via system path + } + + ProgramRunner( const BSONObj &args , bool isMongoProgram=true) + { + assert( !args.isEmpty() ); + + string program( args.firstElement().valuestrsafe() ); + assert( !program.empty() ); + boost::filesystem::path programPath = find(program); + + if (isMongoProgram){ +#if 0 + if (program == "mongos") { + argv_.push_back("valgrind"); + argv_.push_back("--log-file=/tmp/mongos-%p.valgrind"); + argv_.push_back("--leak-check=yes"); + argv_.push_back("--suppressions=valgrind.suppressions"); + //argv_.push_back("--error-exitcode=1"); + argv_.push_back("--"); + } +#endif + } + + argv_.push_back( programPath.native_file_string() ); + + port_ = -1; + + BSONObjIterator j( args ); + j.next(); // skip program name (handled above) + while(j.more()) { + BSONElement e = j.next(); + string str; + if ( e.isNumber() ) { + stringstream ss; + ss << e.number(); + str = ss.str(); + } else { + assert( e.type() == mongo::String ); + str = e.valuestr(); + } + if ( str == "--port" ) + port_ = -2; + else if ( port_ == -2 ) + port_ = strtol( str.c_str(), 0, 10 ); + argv_.push_back(str); + } + + if ( program != "mongod" && program != "mongos" && program != "mongobridge" ) + port_ = 0; + else { + if ( port_ <= 0 ) + cout << "error: a port number is expected when running mongod (etc.) from the shell" << endl; + assert( port_ > 0 ); + } + if ( port_ > 0 && dbs.count( port_ ) != 0 ){ + cerr << "count for port: " << port_ << " is not 0 is: " << dbs.count( port_ ) << endl; + assert( dbs.count( port_ ) == 0 ); + } + } + + void start() { + int pipeEnds[ 2 ]; + assert( pipe( pipeEnds ) != -1 ); + + fflush( 0 ); + launch_process(pipeEnds[1]); //sets pid_ + + { + stringstream ss; + ss << "shell: started program"; + for (unsigned i=0; i < argv_.size(); i++) + ss << " " << argv_[i]; + ss << '\n'; + cout << ss.str(); cout.flush(); + } + + if ( port_ > 0 ) + dbs.insert( make_pair( port_, make_pair( pid_, pipeEnds[ 1 ] ) ) ); + else + shells.insert( make_pair( pid_, pipeEnds[ 1 ] ) ); + pipe_ = pipeEnds[ 0 ]; + } + + // Continue reading output + void operator()() { + try { + // This assumes there aren't any 0's in the mongo program output. + // Hope that's ok. + const unsigned bufSize = 64000; + char buf[ bufSize ]; + char temp[ bufSize ]; + char *start = buf; + while( 1 ) { + int lenToRead = ( bufSize - 1 ) - ( start - buf ); + assert( lenToRead > 0 ); + int ret = read( pipe_, (void *)start, lenToRead ); + if( mongo::goingAway ) + break; + assert( ret != -1 ); + start[ ret ] = '\0'; + if ( strlen( start ) != unsigned( ret ) ) + writeMongoProgramOutputLine( port_, pid_, "WARNING: mongod wrote null bytes to output" ); + char *last = buf; + for( char *i = strchr( buf, '\n' ); i; last = i + 1, i = strchr( last, '\n' ) ) { + *i = '\0'; + writeMongoProgramOutputLine( port_, pid_, last ); + } + if ( ret == 0 ) { + if ( *last ) + writeMongoProgramOutputLine( port_, pid_, last ); + close( pipe_ ); + break; + } + if ( last != buf ) { + strcpy( temp, last ); + strcpy( buf, temp ); + } else { + assert( strlen( buf ) < bufSize ); + } + start = buf + strlen( buf ); + } + } catch(...) { + } + } + void launch_process(int child_stdout){ +#ifdef _WIN32 + stringstream ss; + for( unsigned i=0; i < argv_.size(); i++ ){ + if (i) ss << ' '; + if (argv_[i].find(' ') == string::npos) + ss << argv_[i]; + else + ss << '"' << argv_[i] << '"'; + } + + string args = ss.str(); + + boost::scoped_array args_tchar (new TCHAR[args.size() + 1]); + size_t i; + for(i=0; i < args.size(); i++) + args_tchar[i] = args[i]; + args_tchar[i] = 0; + + HANDLE h = (HANDLE)_get_osfhandle(child_stdout); + assert(h != INVALID_HANDLE_VALUE); + assert(SetHandleInformation(h, HANDLE_FLAG_INHERIT, 1)); + + STARTUPINFO si; + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + si.hStdError = h; + si.hStdOutput = h; + si.dwFlags |= STARTF_USESTDHANDLES; + + PROCESS_INFORMATION pi; + ZeroMemory(&pi, sizeof(pi)); + + bool success = CreateProcess( NULL, args_tchar.get(), NULL, NULL, true, 0, NULL, NULL, &si, &pi) != 0; + { + stringstream ss; + ss << "couldn't start process " << argv_[0]; + uassert(13294, ss.str(), success); + } + + CloseHandle(pi.hThread); + + pid_ = pi.dwProcessId; + handles.insert( make_pair( pid_, pi.hProcess ) ); + +#else + + pid_ = fork(); + assert( pid_ != -1 ); + + if ( pid_ == 0 ) { + // DON'T ASSERT IN THIS BLOCK - very bad things will happen + + const char** argv = new const char* [argv_.size()+1]; // don't need to free - in child + for (unsigned i=0; i < argv_.size(); i++){ + argv[i] = argv_[i].c_str(); + } + argv[argv_.size()] = 0; + + if ( dup2( child_stdout, STDOUT_FILENO ) == -1 || + dup2( child_stdout, STDERR_FILENO ) == -1 ) + { + cout << "Unable to dup2 child output: " << errnoWithDescription() << endl; + ::_Exit(-1); //do not pass go, do not call atexit handlers + } + + execvp( argv[ 0 ], const_cast(argv) ); + + cout << "Unable to start program " << argv[0] << ' ' << errnoWithDescription() << endl; + ::_Exit(-1); + } + +#endif + } + }; + + //returns true if process exited + bool wait_for_pid(pid_t pid, bool block=true, int* exit_code=NULL){ +#ifdef _WIN32 + assert(handles.count(pid)); + HANDLE h = handles[pid]; + + if (block) + WaitForSingleObject(h, INFINITE); + + DWORD tmp; + if(GetExitCodeProcess(h, &tmp)){ + CloseHandle(h); + handles.erase(pid); + if (exit_code) + *exit_code = tmp; + return true; + }else{ + return false; + } +#else + int tmp; + bool ret = (pid == waitpid(pid, &tmp, (block ? 0 : WNOHANG))); + if (exit_code) + *exit_code = WEXITSTATUS(tmp); + return ret; + +#endif + } + + BSONObj WaitProgram( const BSONObj& a ){ + int pid = a.firstElement().numberInt(); + BSONObj x = BSON( "" << wait_for_pid( pid ) ); + shells.erase( pid ); + return x; + } + + BSONObj StartMongoProgram( const BSONObj &a ) { + _nokillop = true; + ProgramRunner r( a ); + r.start(); + boost::thread t( r ); + return BSON( string( "" ) << int( r.pid() ) ); + } + + BSONObj RunMongoProgram( const BSONObj &a ) { + ProgramRunner r( a ); + r.start(); + boost::thread t( r ); + int exit_code; + wait_for_pid( r.pid(), true, &exit_code ); + if ( r.port() > 0 ) { + dbs.erase( r.port() ); + } else { + shells.erase( r.pid() ); + } + return BSON( string( "" ) << exit_code ); + } + + BSONObj RunProgram(const BSONObj &a) { + ProgramRunner r( a, false ); + r.start(); + boost::thread t( r ); + int exit_code; + wait_for_pid(r.pid(), true, &exit_code); + shells.erase( r.pid() ); + return BSON( string( "" ) << exit_code ); + } + + BSONObj ResetDbpath( const BSONObj &a ) { + assert( a.nFields() == 1 ); + string path = a.firstElement().valuestrsafe(); + assert( !path.empty() ); + if ( boost::filesystem::exists( path ) ) + boost::filesystem::remove_all( path ); + boost::filesystem::create_directory( path ); + return undefined_; + } + + void copyDir( const path &from, const path &to ) { + directory_iterator end; + directory_iterator i( from ); + while( i != end ) { + path p = *i; + if ( p.leaf() != "mongod.lock" ) { + if ( is_directory( p ) ) { + path newDir = to / p.leaf(); + boost::filesystem::create_directory( newDir ); + copyDir( p, newDir ); + } else { + boost::filesystem::copy_file( p, to / p.leaf() ); + } + } + ++i; + } + } + + // NOTE target dbpath will be cleared first + BSONObj CopyDbpath( const BSONObj &a ) { + assert( a.nFields() == 2 ); + BSONObjIterator i( a ); + string from = i.next().str(); + string to = i.next().str(); + assert( !from.empty() ); + assert( !to.empty() ); + if ( boost::filesystem::exists( to ) ) + boost::filesystem::remove_all( to ); + boost::filesystem::create_directory( to ); + copyDir( from, to ); + return undefined_; + } + + inline void kill_wrapper(pid_t pid, int sig, int port){ +#ifdef _WIN32 + if (sig == SIGKILL || port == 0){ + assert( handles.count(pid) ); + TerminateProcess(handles[pid], 1); // returns failure for "zombie" processes. + }else{ + DBClientConnection conn; + conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port)); + try { + conn.simpleCommand("admin", NULL, "shutdown"); + } catch (...) { + //Do nothing. This command never returns data to the client and the driver doesn't like that. + } + } +#else + int x = kill( pid, sig ); + if ( x ){ + if ( errno == ESRCH ){ + } + else { + cout << "killFailed: " << errnoWithDescription() << endl; + assert( x == 0 ); + } + } + +#endif + } + + int killDb( int port, pid_t _pid, int signal ) { + pid_t pid; + int exitCode = 0; + if ( port > 0 ) { + if( dbs.count( port ) != 1 ) { + cout << "No db started on port: " << port << endl; + return 0; + } + pid = dbs[ port ].first; + } else { + pid = _pid; + } + + kill_wrapper( pid, signal, port ); + + int i = 0; + for( ; i < 65; ++i ) { + if ( i == 5 ) { + char now[64]; + time_t_to_String(time(0), now); + now[ 20 ] = 0; + cout << now << " process on port " << port << ", with pid " << pid << " not terminated, sending sigkill" << endl; + kill_wrapper( pid, SIGKILL, port ); + } + if(wait_for_pid(pid, false, &exitCode)) + break; + sleepmillis( 1000 ); + } + if ( i == 65 ) { + char now[64]; + time_t_to_String(time(0), now); + now[ 20 ] = 0; + cout << now << " failed to terminate process on port " << port << ", with pid " << pid << endl; + assert( "Failed to terminate process" == 0 ); + } + + if ( port > 0 ) { + close( dbs[ port ].second ); + dbs.erase( port ); + } else { + close( shells[ pid ] ); + shells.erase( pid ); + } + if ( i > 4 || signal == SIGKILL ) { + sleepmillis( 4000 ); // allow operating system to reclaim resources + } + + return exitCode; + } + + int getSignal( const BSONObj &a ) { + int ret = SIGTERM; + if ( a.nFields() == 2 ) { + BSONObjIterator i( a ); + i.next(); + BSONElement e = i.next(); + assert( e.isNumber() ); + ret = int( e.number() ); + } + return ret; + } + + BSONObj StopMongoProgram( const BSONObj &a ) { + assert( a.nFields() == 1 || a.nFields() == 2 ); + assert( a.firstElement().isNumber() ); + int port = int( a.firstElement().number() ); + int code = killDb( port, 0, getSignal( a ) ); + cout << "shell: stopped mongo program on port " << port << endl; + return BSON( "" << code ); + } + + BSONObj StopMongoProgramByPid( const BSONObj &a ) { + assert( a.nFields() == 1 || a.nFields() == 2 ); + assert( a.firstElement().isNumber() ); + int pid = int( a.firstElement().number() ); + int code = killDb( 0, pid, getSignal( a ) ); + cout << "shell: stopped mongo program on pid " << pid << endl; + return BSON( "" << code ); + } + + void KillMongoProgramInstances() { + vector< int > ports; + for( map< int, pair< pid_t, int > >::iterator i = dbs.begin(); i != dbs.end(); ++i ) + ports.push_back( i->first ); + for( vector< int >::iterator i = ports.begin(); i != ports.end(); ++i ) + killDb( *i, 0, SIGTERM ); + vector< pid_t > pids; + for( map< pid_t, int >::iterator i = shells.begin(); i != shells.end(); ++i ) + pids.push_back( i->first ); + for( vector< pid_t >::iterator i = pids.begin(); i != pids.end(); ++i ) + killDb( 0, *i, SIGTERM ); + } +#else // ndef MONGO_SAFE_SHELL + void KillMongoProgramInstances() {} +#endif + + MongoProgramScope::~MongoProgramScope() { + DESTRUCTOR_GUARD( + KillMongoProgramInstances(); + ClearRawMongoProgramOutput( BSONObj() ); + ) + } + + unsigned _randomSeed; + + BSONObj JSSrand( const BSONObj &a ) { + uassert( 12518, "srand requires a single numeric argument", + a.nFields() == 1 && a.firstElement().isNumber() ); + _randomSeed = (unsigned)a.firstElement().numberLong(); // grab least significant digits + return undefined_; + } + + BSONObj JSRand( const BSONObj &a ) { + uassert( 12519, "rand accepts no arguments", a.nFields() == 0 ); + unsigned r; +#if !defined(_WIN32) + r = rand_r( &_randomSeed ); +#else + r = rand(); // seed not used in this case +#endif + return BSON( "" << double( r ) / ( double( RAND_MAX ) + 1 ) ); + } + + BSONObj isWindows(const BSONObj& a){ + uassert( 13006, "isWindows accepts no arguments", a.nFields() == 0 ); +#ifdef _WIN32 + return BSON( "" << true ); +#else + return BSON( "" << false ); +#endif + } + + BSONObj getHostName(const BSONObj& a){ + uassert( 13411, "getHostName accepts no arguments", a.nFields() == 0 ); + char buf[260]; // HOST_NAME_MAX is usually 255 + assert(gethostname(buf, 260) == 0); + buf[259] = '\0'; + return BSON("" << buf); + + } + + void installShellUtils( Scope& scope ){ + theScope = &scope; + scope.injectNative( "sleep" , JSSleep ); + scope.injectNative( "quit", Quit ); + scope.injectNative( "getMemInfo" , JSGetMemInfo ); + scope.injectNative( "_srand" , JSSrand ); + scope.injectNative( "_rand" , JSRand ); + scope.injectNative( "_isWindows" , isWindows ); + +#ifndef MONGO_SAFE_SHELL + //can't launch programs + scope.injectNative( "_startMongoProgram", StartMongoProgram ); + scope.injectNative( "runProgram", RunProgram ); + scope.injectNative( "run", RunProgram ); + scope.injectNative( "runMongoProgram", RunMongoProgram ); + scope.injectNative( "stopMongod", StopMongoProgram ); + scope.injectNative( "stopMongoProgram", StopMongoProgram ); + scope.injectNative( "stopMongoProgramByPid", StopMongoProgramByPid ); + scope.injectNative( "rawMongoProgramOutput", RawMongoProgramOutput ); + scope.injectNative( "clearRawMongoProgramOutput", ClearRawMongoProgramOutput ); + scope.injectNative( "waitProgram" , WaitProgram ); + + scope.injectNative( "getHostName" , getHostName ); + scope.injectNative( "removeFile" , removeFile ); + scope.injectNative( "listFiles" , listFiles ); + scope.injectNative( "ls" , ls ); + scope.injectNative( "pwd", pwd ); + scope.injectNative( "cd", cd ); + scope.injectNative( "cat", cat ); + scope.injectNative( "hostname", hostname); + scope.injectNative( "resetDbpath", ResetDbpath ); + scope.injectNative( "copyDbpath", CopyDbpath ); + scope.injectNative( "md5sumFile", md5sumFile ); + scope.injectNative( "mkdir" , mkdir ); +#endif + } + + void initScope( Scope &scope ) { + scope.externalSetup(); + mongo::shellUtils::installShellUtils( scope ); + scope.execSetup( jsconcatcode_server , "setupServerCode" ); + + if ( !_dbConnect.empty() ) { + uassert( 12513, "connect failed", scope.exec( _dbConnect , "(connect)" , false , true , false ) ); + if ( !_dbAuth.empty() ) { + installGlobalUtils( scope ); + uassert( 12514, "login failed", scope.exec( _dbAuth , "(auth)" , true , true , false ) ); + } + } + } + + map< const void*, string > _allMyUris; + bool _nokillop = false; + void onConnect( DBClientWithCommands &c ) { + if ( _nokillop ) { + return; + } + BSONObj info; + if ( c.runCommand( "admin", BSON( "whatsmyuri" << 1 ), info ) ) { + // There's no way to explicitly disconnect a DBClientConnection, but we might allocate + // a new uri on automatic reconnect. So just store one uri per connection. + _allMyUris[ &c ] = info[ "you" ].str(); + } + } + } +} diff -Nru mongodb-1.4.4/shell/utils.cpp mongodb-1.6.3/shell/utils.cpp --- mongodb-1.4.4/shell/utils.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/utils.cpp 1969-12-31 16:00:00.000000000 -0800 @@ -1,713 +0,0 @@ -// utils.cpp -/* - * Copyright 2010 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "../stdafx.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef _WIN32 -# include -# define SIGKILL 9 -#else -# include -# include -# include -# include -# include -#endif - -#include "../client/dbclient.h" -#include "../util/processinfo.h" -#include "utils.h" - -extern const char * jsconcatcode_server; - -namespace mongo { -#ifdef _WIN32 - inline int close(int fd) { return _close(fd); } - inline int read(int fd, void* buf, size_t size) { return _read(fd, buf, size); } - - inline int pipe(int fds[2]) { return _pipe(fds, 1024, _O_TEXT | _O_NOINHERIT); } -#endif - - namespace shellUtils { - - std::string _dbConnect; - std::string _dbAuth; - - const char *argv0 = 0; - void RecordMyLocation( const char *_argv0 ) { argv0 = _argv0; } - - // helpers - - BSONObj makeUndefined() { - BSONObjBuilder b; - b.appendUndefined( "" ); - return b.obj(); - } - const BSONObj undefined_ = makeUndefined(); - - BSONObj encapsulate( const BSONObj &obj ) { - return BSON( "" << obj ); - } - - void sleepms( int ms ) { - boost::xtime xt; - boost::xtime_get(&xt, boost::TIME_UTC); - xt.sec += ( ms / 1000 ); - xt.nsec += ( ms % 1000 ) * 1000000; - if ( xt.nsec >= 1000000000 ) { - xt.nsec -= 1000000000; - xt.sec++; - } - boost::thread::sleep(xt); - } - - // real methods - - - - mongo::BSONObj JSSleep(const mongo::BSONObj &args){ - assert( args.nFields() == 1 ); - assert( args.firstElement().isNumber() ); - int ms = int( args.firstElement().number() ); - { - auto_ptr< ScriptEngine::Unlocker > u = globalScriptEngine->newThreadUnlocker(); - sleepms( ms ); - } - return undefined_; - } - - - BSONObj Quit(const BSONObj& args) { - // If not arguments are given first element will be EOO, which - // converts to the integer value 0. - int exit_code = int( args.firstElement().number() ); - ::exit(exit_code); - return undefined_; - } - - BSONObj JSGetMemInfo( const BSONObj& args ){ - ProcessInfo pi; - uassert( 10258 , "processinfo not supported" , pi.supported() ); - - BSONObjBuilder e; - e.append( "virtual" , pi.getVirtualMemorySize() ); - e.append( "resident" , pi.getResidentSize() ); - - BSONObjBuilder b; - b.append( "ret" , e.obj() ); - - return b.obj(); - } - - -#ifndef MONGO_SAFE_SHELL - - BSONObj listFiles(const BSONObj& args){ - uassert( 10257 , "need to specify 1 argument to listFiles" , args.nFields() == 1 ); - - BSONObjBuilder lst; - - string rootname = args.firstElement().valuestrsafe(); - path root( rootname ); - stringstream ss; - ss << "listFiles: no such directory: " << rootname; - string msg = ss.str(); - uassert( 12581, msg.c_str(), boost::filesystem::exists( root ) ); - - directory_iterator end; - directory_iterator i( root); - - int num =0; - while ( i != end ){ - path p = *i; - BSONObjBuilder b; - b << "name" << p.string(); - b.appendBool( "isDirectory", is_directory( p ) ); - if ( ! is_directory( p ) ){ - try { - b.append( "size" , (double)file_size( p ) ); - } - catch ( ... ){ - i++; - continue; - } - } - - stringstream ss; - ss << num; - string name = ss.str(); - lst.append( name.c_str(), b.done() ); - num++; - i++; - } - - BSONObjBuilder ret; - ret.appendArray( "", lst.done() ); - return ret.obj(); - } - - - BSONObj removeFile(const BSONObj& args){ - uassert( 12597 , "need to specify 1 argument to listFiles" , args.nFields() == 1 ); - - bool found = false; - - path root( args.firstElement().valuestrsafe() ); - if ( boost::filesystem::exists( root ) ){ - found = true; - boost::filesystem::remove_all( root ); - } - - BSONObjBuilder b; - b.appendBool( "removed" , found ); - return b.obj(); - } - map< int, pair< pid_t, int > > dbs; - map< pid_t, int > shells; -#ifdef _WIN32 - map< pid_t, HANDLE > handles; -#endif - - mongo::mutex mongoProgramOutputMutex; - stringstream mongoProgramOutput_; - - void writeMongoProgramOutputLine( int port, int pid, const char *line ) { - mongo::mutex::scoped_lock lk( mongoProgramOutputMutex ); - stringstream buf; - if ( port > 0 ) - buf << "m" << port << "| " << line; - else - buf << "sh" << pid << "| " << line; - cout << buf.str() << endl; - mongoProgramOutput_ << buf.str() << endl; - } - - // only returns last 100000 characters - BSONObj RawMongoProgramOutput( const BSONObj &args ) { - mongo::mutex::scoped_lock lk( mongoProgramOutputMutex ); - string out = mongoProgramOutput_.str(); - size_t len = out.length(); - if ( len > 100000 ) - out = out.substr( len - 100000, 100000 ); - return BSON( "" << out ); - } - - BSONObj ClearRawMongoProgramOutput( const BSONObj &args ) { - mongo::mutex::scoped_lock lk( mongoProgramOutputMutex ); - mongoProgramOutput_.str( "" ); - return undefined_; - } - - class ProgramRunner { - vector argv_; - int port_; - int pipe_; - pid_t pid_; - public: - pid_t pid() const { return pid_; } - ProgramRunner( const BSONObj &args , bool isMongoProgram=true) - { - assert( !args.isEmpty() ); - - string program( args.firstElement().valuestrsafe() ); - assert( !program.empty() ); - boost::filesystem::path programPath = program; - - if (isMongoProgram){ - programPath = boost::filesystem::initial_path() / programPath; -#ifdef _WIN32 - programPath = change_extension(programPath, ".exe"); -#endif - massert( 10435 , "couldn't find " + programPath.native_file_string(), boost::filesystem::exists( programPath ) ); - } - - argv_.push_back( programPath.native_file_string() ); - - port_ = -1; - - BSONObjIterator j( args ); - j.next(); // skip program name (handled above) - while(j.more()) { - BSONElement e = j.next(); - string str; - if ( e.isNumber() ) { - stringstream ss; - ss << e.number(); - str = ss.str(); - } else { - assert( e.type() == mongo::String ); - str = e.valuestr(); - } - if ( str == "--port" ) - port_ = -2; - else if ( port_ == -2 ) - port_ = strtol( str.c_str(), 0, 10 ); - argv_.push_back(str); - } - - if ( program != "mongod" && program != "mongos" && program != "mongobridge" ) - port_ = 0; - else - assert( port_ > 0 ); - if ( port_ > 0 && dbs.count( port_ ) != 0 ){ - cerr << "count for port: " << port_ << " is not 0 is: " << dbs.count( port_ ) << endl; - assert( dbs.count( port_ ) == 0 ); - } - } - - void start() { - int pipeEnds[ 2 ]; - assert( pipe( pipeEnds ) != -1 ); - - fflush( 0 ); - launch_process(pipeEnds[1]); //sets pid_ - - cout << "shell: started mongo program"; - for (unsigned i=0; i < argv_.size(); i++) - cout << " " << argv_[i]; - cout << endl; - - if ( port_ > 0 ) - dbs.insert( make_pair( port_, make_pair( pid_, pipeEnds[ 1 ] ) ) ); - else - shells.insert( make_pair( pid_, pipeEnds[ 1 ] ) ); - pipe_ = pipeEnds[ 0 ]; - } - - // Continue reading output - void operator()() { - // This assumes there aren't any 0's in the mongo program output. - // Hope that's ok. - char buf[ 1024 ]; - char temp[ 1024 ]; - char *start = buf; - while( 1 ) { - int lenToRead = 1023 - ( start - buf ); - int ret = read( pipe_, (void *)start, lenToRead ); - assert( ret != -1 ); - start[ ret ] = '\0'; - if ( strlen( start ) != unsigned( ret ) ) - writeMongoProgramOutputLine( port_, pid_, "WARNING: mongod wrote null bytes to output" ); - char *last = buf; - for( char *i = strchr( buf, '\n' ); i; last = i + 1, i = strchr( last, '\n' ) ) { - *i = '\0'; - writeMongoProgramOutputLine( port_, pid_, last ); - } - if ( ret == 0 ) { - if ( *last ) - writeMongoProgramOutputLine( port_, pid_, last ); - close( pipe_ ); - break; - } - if ( last != buf ) { - strcpy( temp, last ); - strcpy( buf, temp ); - } else { - assert( strlen( buf ) <= 1023 ); - } - start = buf + strlen( buf ); - } - } - void launch_process(int child_stdout){ -#ifdef _WIN32 - stringstream ss; - for (int i=0; i < argv_.size(); i++){ - if (i) ss << ' '; - if (argv_[i].find(' ') == string::npos) - ss << argv_[i]; - else - ss << '"' << argv_[i] << '"'; - } - - string args = ss.str(); - - boost::scoped_array args_tchar (new TCHAR[args.size() + 1]); - for (size_t i=0; i < args.size()+1; i++) - args_tchar[i] = args[i]; - - HANDLE h = (HANDLE)_get_osfhandle(child_stdout); - assert(h != INVALID_HANDLE_VALUE); - assert(SetHandleInformation(h, HANDLE_FLAG_INHERIT, 1)); - - STARTUPINFO si; - ZeroMemory(&si, sizeof(si)); - si.cb = sizeof(si); - si.hStdError = h; - si.hStdOutput = h; - si.dwFlags |= STARTF_USESTDHANDLES; - - PROCESS_INFORMATION pi; - ZeroMemory(&pi, sizeof(pi)); - - bool success = CreateProcess( NULL, args_tchar.get(), NULL, NULL, true, 0, NULL, NULL, &si, &pi); - assert(success); - - CloseHandle(pi.hThread); - - pid_ = pi.dwProcessId; - handles.insert( make_pair( pid_, pi.hProcess ) ); - -#else - - pid_ = fork(); - assert( pid_ != -1 ); - - if ( pid_ == 0 ) { - // DON'T ASSERT IN THIS BLOCK - very bad things will happen - - const char** argv = new const char* [argv_.size()+1]; // don't need to free - in child - for (unsigned i=0; i < argv_.size(); i++){ - argv[i] = argv_[i].c_str(); - } - argv[argv_.size()] = 0; - - if ( dup2( child_stdout, STDOUT_FILENO ) == -1 || - dup2( child_stdout, STDERR_FILENO ) == -1 ) - { - cout << "Unable to dup2 child output: " << OUTPUT_ERRNO << endl; - ::_Exit(-1); //do not pass go, do not call atexit handlers - } - - execvp( argv[ 0 ], const_cast(argv) ); - - cout << "Unable to start program: " << OUTPUT_ERRNO << endl; - ::_Exit(-1); - } - -#endif - } - }; - - //returns true if process exited - bool wait_for_pid(pid_t pid, bool block=true, int* exit_code=NULL){ -#ifdef _WIN32 - assert(handles.count(pid)); - HANDLE h = handles[pid]; - - if (block) - WaitForSingleObject(h, INFINITE); - - DWORD tmp; - if(GetExitCodeProcess(h, &tmp)){ - CloseHandle(h); - handles.erase(pid); - if (exit_code) - *exit_code = tmp; - return true; - }else{ - return false; - } -#else - int tmp; - bool ret = (pid == waitpid(pid, &tmp, (block ? 0 : WNOHANG))); - if (exit_code) - *exit_code = WEXITSTATUS(tmp); - return ret; - -#endif - } - BSONObj StartMongoProgram( const BSONObj &a ) { - _nokillop = true; - ProgramRunner r( a ); - r.start(); - boost::thread t( r ); - return BSON( string( "" ) << int( r.pid() ) ); - } - - BSONObj RunMongoProgram( const BSONObj &a ) { - ProgramRunner r( a ); - r.start(); - boost::thread t( r ); - wait_for_pid(r.pid()); - shells.erase( r.pid() ); - return BSON( string( "" ) << int( r.pid() ) ); - } - - BSONObj RunProgram(const BSONObj &a) { - ProgramRunner r( a, false ); - r.start(); - boost::thread t( r ); - int exit_code; - wait_for_pid(r.pid(), true, &exit_code); - shells.erase( r.pid() ); - return BSON( string( "" ) << exit_code ); - } - - BSONObj ResetDbpath( const BSONObj &a ) { - assert( a.nFields() == 1 ); - string path = a.firstElement().valuestrsafe(); - assert( !path.empty() ); - if ( boost::filesystem::exists( path ) ) - boost::filesystem::remove_all( path ); - boost::filesystem::create_directory( path ); - return undefined_; - } - - void copyDir( const path &from, const path &to ) { - directory_iterator end; - directory_iterator i( from ); - while( i != end ) { - path p = *i; - if ( p.leaf() != "mongod.lock" ) { - if ( is_directory( p ) ) { - path newDir = to / p.leaf(); - boost::filesystem::create_directory( newDir ); - copyDir( p, newDir ); - } else { - boost::filesystem::copy_file( p, to / p.leaf() ); - } - } - ++i; - } - } - - // NOTE target dbpath will be cleared first - BSONObj CopyDbpath( const BSONObj &a ) { - assert( a.nFields() == 2 ); - BSONObjIterator i( a ); - string from = i.next().str(); - string to = i.next().str(); - assert( !from.empty() ); - assert( !to.empty() ); - if ( boost::filesystem::exists( to ) ) - boost::filesystem::remove_all( to ); - boost::filesystem::create_directory( to ); - copyDir( from, to ); - return undefined_; - } - - inline void kill_wrapper(pid_t pid, int sig, int port){ -#ifdef _WIN32 - if (sig == SIGKILL || port == 0){ - assert( handles.count(pid) ); - TerminateProcess(handles[pid], 1); // returns failure for "zombie" processes. - }else{ - DBClientConnection conn; - conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port)); - try { - conn.simpleCommand("admin", NULL, "shutdown"); - } catch (...) { - //Do nothing. This command never returns data to the client and the driver doesn't like that. - } - } -#else - assert( 0 == kill( pid, sig ) ); -#endif - } - - - int killDb( int port, pid_t _pid, int signal ) { - pid_t pid; - int exitCode = 0; - if ( port > 0 ) { - if( dbs.count( port ) != 1 ) { - cout << "No db started on port: " << port << endl; - return 0; - } - pid = dbs[ port ].first; - } else { - pid = _pid; - } - - kill_wrapper( pid, signal, port ); - - int i = 0; - for( ; i < 65; ++i ) { - if ( i == 5 ) { - char now[64]; - time_t_to_String(time(0), now); - now[ 20 ] = 0; - cout << now << " process on port " << port << ", with pid " << pid << " not terminated, sending sigkill" << endl; - kill_wrapper( pid, SIGKILL, port ); - } - if(wait_for_pid(pid, false, &exitCode)) - break; - sleepms( 1000 ); - } - if ( i == 65 ) { - char now[64]; - time_t_to_String(time(0), now); - now[ 20 ] = 0; - cout << now << " failed to terminate process on port " << port << ", with pid " << pid << endl; - assert( "Failed to terminate process" == 0 ); - } - - if ( port > 0 ) { - close( dbs[ port ].second ); - dbs.erase( port ); - } else { - close( shells[ pid ] ); - shells.erase( pid ); - } - if ( i > 4 || signal == SIGKILL ) { - sleepms( 4000 ); // allow operating system to reclaim resources - } - - return exitCode; - } - - int getSignal( const BSONObj &a ) { - int ret = SIGTERM; - if ( a.nFields() == 2 ) { - BSONObjIterator i( a ); - i.next(); - BSONElement e = i.next(); - assert( e.isNumber() ); - ret = int( e.number() ); - } - return ret; - } - - BSONObj StopMongoProgram( const BSONObj &a ) { - assert( a.nFields() == 1 || a.nFields() == 2 ); - assert( a.firstElement().isNumber() ); - int port = int( a.firstElement().number() ); - int code = killDb( port, 0, getSignal( a ) ); - cout << "shell: stopped mongo program on port " << port << endl; - return BSON( "" << code ); - } - - BSONObj StopMongoProgramByPid( const BSONObj &a ) { - assert( a.nFields() == 1 || a.nFields() == 2 ); - assert( a.firstElement().isNumber() ); - int pid = int( a.firstElement().number() ); - int code = killDb( 0, pid, getSignal( a ) ); - cout << "shell: stopped mongo program on pid " << pid << endl; - return BSON( "" << code ); - } - - void KillMongoProgramInstances() { - vector< int > ports; - for( map< int, pair< pid_t, int > >::iterator i = dbs.begin(); i != dbs.end(); ++i ) - ports.push_back( i->first ); - for( vector< int >::iterator i = ports.begin(); i != ports.end(); ++i ) - killDb( *i, 0, SIGTERM ); - vector< pid_t > pids; - for( map< pid_t, int >::iterator i = shells.begin(); i != shells.end(); ++i ) - pids.push_back( i->first ); - for( vector< pid_t >::iterator i = pids.begin(); i != pids.end(); ++i ) - killDb( 0, *i, SIGTERM ); - } -#else // ndef MONGO_SAFE_SHELL - void KillMongoProgramInstances() {} -#endif - - MongoProgramScope::~MongoProgramScope() { - DESTRUCTOR_GUARD( - KillMongoProgramInstances(); - ClearRawMongoProgramOutput( BSONObj() ); - ) - } - - unsigned _randomSeed; - - BSONObj JSSrand( const BSONObj &a ) { - uassert( 12518, "srand requires a single numeric argument", - a.nFields() == 1 && a.firstElement().isNumber() ); - _randomSeed = (unsigned)a.firstElement().numberLong(); // grab least significant digits - return undefined_; - } - - BSONObj JSRand( const BSONObj &a ) { - uassert( 12519, "rand accepts no arguments", a.nFields() == 0 ); - unsigned r; -#if !defined(_WIN32) - r = rand_r( &_randomSeed ); -#else - r = rand(); // seed not used in this case -#endif - return BSON( "" << double( r ) / ( double( RAND_MAX ) + 1 ) ); - } - - BSONObj isWindows(const BSONObj& a){ - uassert( 13006, "isWindows accepts no arguments", a.nFields() == 0 ); -#ifdef _WIN32 - return BSON( "" << true ); -#else - return BSON( "" << false ); -#endif - } - - void installShellUtils( Scope& scope ){ - scope.injectNative( "sleep" , JSSleep ); - scope.injectNative( "quit", Quit ); - scope.injectNative( "getMemInfo" , JSGetMemInfo ); - scope.injectNative( "_srand" , JSSrand ); - scope.injectNative( "_rand" , JSRand ); - scope.injectNative( "_isWindows" , isWindows ); - -#ifndef MONGO_SAFE_SHELL - //can't launch programs - scope.injectNative( "_startMongoProgram", StartMongoProgram ); - scope.injectNative( "runProgram", RunProgram ); - scope.injectNative( "runMongoProgram", RunMongoProgram ); - scope.injectNative( "stopMongod", StopMongoProgram ); - scope.injectNative( "stopMongoProgram", StopMongoProgram ); - scope.injectNative( "stopMongoProgramByPid", StopMongoProgramByPid ); - scope.injectNative( "rawMongoProgramOutput", RawMongoProgramOutput ); - scope.injectNative( "clearRawMongoProgramOutput", ClearRawMongoProgramOutput ); - - //can't access filesystem - scope.injectNative( "removeFile" , removeFile ); - scope.injectNative( "listFiles" , listFiles ); - scope.injectNative( "resetDbpath", ResetDbpath ); - scope.injectNative( "copyDbpath", CopyDbpath ); -#endif - } - - void initScope( Scope &scope ) { - scope.externalSetup(); - mongo::shellUtils::installShellUtils( scope ); - scope.execSetup( jsconcatcode_server , "setupServerCode" ); - - if ( !_dbConnect.empty() ) { - uassert( 12513, "connect failed", scope.exec( _dbConnect , "(connect)" , false , true , false ) ); - if ( !_dbAuth.empty() ) { - installGlobalUtils( scope ); - uassert( 12514, "login failed", scope.exec( _dbAuth , "(auth)" , true , true , false ) ); - } - } - } - - map< const void*, string > _allMyUris; - bool _nokillop = false; - void onConnect( DBClientWithCommands &c ) { - if ( _nokillop ) { - return; - } - BSONObj info; - if ( c.runCommand( "admin", BSON( "whatsmyuri" << 1 ), info ) ) { - // There's no way to explicitly disconnect a DBClientConnection, but we might allocate - // a new uri on automatic reconnect. So just store one uri per connection. - _allMyUris[ &c ] = info[ "you" ].str(); - } - } - } -} diff -Nru mongodb-1.4.4/shell/utils.js mongodb-1.6.3/shell/utils.js --- mongodb-1.4.4/shell/utils.js 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/shell/utils.js 2010-09-24 10:02:42.000000000 -0700 @@ -1,5 +1,5 @@ - __quiet = false; +__magicNoPrint = { __magicNoPrint : 1111 } chatty = function(s){ if ( ! __quiet ) @@ -16,9 +16,11 @@ return false; } - -doassert = function( msg ){ - print( "assert: " + msg ); +doassert = function (msg) { + if (msg.indexOf("assert") == 0) + print(msg); + else + print("assert: " + msg); throw msg; } @@ -28,7 +30,11 @@ if ( b ) return; - doassert( "assert failed : " + msg ); + doassert( msg == undefined ? "assert failed" : "assert failed : " + msg ); +} + +assert.automsg = function( b ) { + assert( eval( b ), b ); } assert._debug = false; @@ -45,6 +51,10 @@ doassert( "[" + tojson( a ) + "] != [" + tojson( b ) + "] are not equal : " + msg ); } +assert.eq.automsg = function( a, b ) { + assert.eq( eval( a ), eval( b ), "[" + a + "] != [" + b + "]" ); +} + assert.neq = function( a , b , msg ){ if ( assert._debug && msg ) print( "in assert for: " + msg ); if ( a != b ) @@ -53,6 +63,30 @@ doassert( "[" + a + "] != [" + b + "] are equal : " + msg ); } +assert.repeat = function( f, msg, timeout, interval ) { + if ( assert._debug && msg ) print( "in assert for: " + msg ); + + var start = new Date(); + timeout = timeout || 30000; + interval = interval || 200; + var last; + while( 1 ) { + + if ( typeof( f ) == "string" ){ + if ( eval( f ) ) + return; + } + else { + if ( f() ) + return; + } + + if ( ( new Date() ).getTime() - start.getTime() > timeout ) + break; + sleep( interval ); + } +} + assert.soon = function( f, msg, timeout, interval ) { if ( assert._debug && msg ) print( "in assert for: " + msg ); @@ -89,6 +123,10 @@ doassert( "did not throw exception: " + msg ); } +assert.throws.automsg = function( func, params ) { + assert.throws( func, params, func.toString() ); +} + assert.commandWorked = function( res , msg ){ if ( assert._debug && msg ) print( "in assert for: " + msg ); @@ -132,6 +170,23 @@ doassert( a + " is not greater than " + b + " : " + msg ); } +assert.lte = function( a , b , msg ){ + if ( assert._debug && msg ) print( "in assert for: " + msg ); + + if ( a <= b ) + return; + doassert( a + " is not less than or eq " + b + " : " + msg ); +} + +assert.gte = function( a , b , msg ){ + if ( assert._debug && msg ) print( "in assert for: " + msg ); + + if ( a >= b ) + return; + doassert( a + " is not greater than or eq " + b + " : " + msg ); +} + + assert.close = function( a , b , msg , places ){ if (places === undefined) { places = 4; @@ -304,6 +359,14 @@ return ret; } +if ( ! NumberLong.prototype ) { + NumberLong.prototype = {} +} + +NumberLong.prototype.tojson = function() { + return this.toString(); +} + if ( ! ObjectId.prototype ) ObjectId.prototype = {} @@ -317,6 +380,14 @@ ObjectId.prototype.isObjectId = true; +ObjectId.prototype.getTimestamp = function(){ + return new Date(parseInt(this.toString().slice(0,8), 16)*1000); +} + +ObjectId.prototype.equals = function( other){ + return this.str == other.str; +} + if ( typeof( DBPointer ) != "undefined" ){ DBPointer.prototype.fetch = function(){ assert( this.ns , "need a ns" ); @@ -366,12 +437,19 @@ } if ( typeof( BinData ) != "undefined" ){ - BinData.prototype.tojson = function(){ - return "BinData type: " + this.type + " len: " + this.len; + BinData.prototype.tojson = function () { + //return "BinData type: " + this.type + " len: " + this.len; + return this.toString(); } } else { - print( "warning: no BinData" ); + print( "warning: no BinData class" ); +} + +if ( typeof( UUID ) != "undefined" ){ + UUID.prototype.tojson = function () { + return this.toString(); + } } if ( typeof _threadInject != "undefined" ){ @@ -489,7 +567,9 @@ "jstests/indexb.js", "jstests/profile1.js", "jstests/mr3.js", - "jstests/apitest_db.js"] ); + "jstests/indexh.js", + "jstests/apitest_db.js", + "jstests/evalb.js"] ); // some tests can't be run in parallel with each other var serialTestsArr = [ "jstests/fsync.js", @@ -505,8 +585,8 @@ files.forEach( function(x) { - if ( /_runner/.test(x.name) || - /_lodeRunner/.test(x.name) || + if ( ( /[\/\\]_/.test(x.name) ) || + ( ! /\.js$/.test(x.name ) ) || ( x.name in skipTests ) || ( x.name in serialTests ) || ! /\.js$/.test(x.name ) ){ @@ -587,6 +667,10 @@ } } +tojsononeline = function( x ){ + return tojson( x , " " , true ); +} + tojson = function( x, indent , nolint ){ if ( x === null ) return "null"; @@ -597,24 +681,34 @@ if (!indent) indent = ""; - switch ( typeof x ){ - + switch ( typeof x ) { case "string": { var s = "\""; for ( var i=0; i= 1ms"); - print( "\t" + "use set curent database to " ); - print( "\t" + "db.help() help on DB methods"); - print( "\t" + "db.foo.help() help on collection methods"); - print( "\t" + "db.foo.find() list objects in collection foo" ); - print( "\t" + "db.foo.find( { a : 1 } ) list objects in foo where a == 1" ); - print( "\t" + "it result of the last line evaluated; use to further iterate"); -} - shellHelper.use = function( dbname ){ db = db.getMongo().getDB( dbname ); print( "switched to db " + db.getName() ); @@ -884,6 +975,7 @@ if ( typeof( gc ) == "undefined" ){ gc = function(){ + print( "warning: using noop gc()" ); } } @@ -953,3 +1045,108 @@ return Math.sqrt( Math.pow( by - ay , 2 ) + Math.pow( bx - ax , 2 ) ); } + +rs = function () { return "try rs.help()"; } + +rs.help = function () { + print("\trs.status() { replSetGetStatus : 1 } checks repl set status"); + print("\trs.initiate() { replSetInitiate : null } initiates set with default settings"); + print("\trs.initiate(cfg) { replSetInitiate : cfg } initiates set with configuration cfg"); + print("\trs.add(hostportstr) add a new member to the set with default attributes"); + print("\trs.add(membercfgobj) add a new member to the set with extra attributes"); + print("\trs.addArb(hostportstr) add a new member which is arbiterOnly:true"); + print("\trs.stepDown() step down as primary (momentarily)"); + print("\trs.conf() return configuration from local.system.replset"); + print("\trs.slaveOk() shorthand for db.getMongo().setSlaveOk()"); + print(); + print("\tdb.isMaster() check who is primary"); + print(); + print("\tsee also http://:28017/_replSet for additional diagnostic info"); +} +rs.slaveOk = function () { return db.getMongo().setSlaveOk(); } +rs.status = function () { return db._adminCommand("replSetGetStatus"); } +rs.isMaster = function () { return db.isMaster(); } +rs.initiate = function (c) { return db._adminCommand({ replSetInitiate: c }); } +rs.add = function (hostport, arb) { + var cfg = hostport; + + var local = db.getSisterDB("local"); + assert(local.system.replset.count() <= 1, "error: local.system.replset has unexpected contents"); + var c = local.system.replset.findOne(); + assert(c, "no config object retrievable from local.system.replset"); + c.version++; + var max = 0; + for (var i in c.members) + if (c.members[i]._id > max) max = c.members[i]._id; + if (isString(hostport)) { + cfg = { _id: max + 1, host: hostport }; + if (arb) + cfg.arbiterOnly = true; + } + c.members.push(cfg); + return db._adminCommand({ replSetReconfig: c }); +} +rs.stepDown = function () { return db._adminCommand({ replSetStepDown:true}); } +rs.addArb = function (hn) { return this.add(hn, true); } +rs.conf = function () { return db.getSisterDB("local").system.replset.findOne(); } + +help = shellHelper.help = function (x) { + if (x == "connect") { + print("\nNormally one specifies the server on the mongo shell command line. Run mongo --help to see those options."); + print("Additional connections may be opened:\n"); + print(" var x = new Mongo('host[:port]');"); + print(" var mydb = x.getDB('mydb');"); + print(" or"); + print(" var mydb = connect('host[:port]/mydb');"); + print("\nNote: the REPL prompt only auto-reports getLastError() for the shell command line connection.\n"); + return; + } + if (x == "misc") { + print("\tb = new BinData(subtype,base64str) create a BSON BinData value"); + print("\tb.subtype() the BinData subtype (0..255)"); + print("\tb.length() length of the BinData data in bytes"); + print("\tb.hex() the data as a hex encoded string"); + print("\tb.base64() the data as a base 64 encoded string"); + print("\tb.toString()"); + return; + } + if (x == "admin") { + print("\tls([path]) list files"); + print("\tpwd() returns current directory"); + print("\tlistFiles([path]) returns file list"); + print("\thostname() returns name of this host"); + print("\tcat(fname) returns contents of text file as a string"); + print("\tremoveFile(f) delete a file"); + print("\tload(jsfilename) load and execute a .js file"); + print("\trun(program[, args...]) spawn a program and wait for its completion"); + print("\tsleep(m) sleep m milliseconds"); + print("\tgetMemInfo() diagnostic"); + return; + } + if (x == "test") { + print("\tstartMongodEmpty(args) DELETES DATA DIR and then starts mongod"); + print("\t returns a connection to the new server"); + print("\tstartMongodTest() DELETES DATA DIR"); + print("\t automatically picks port #s starting at 27000 and increasing"); + print("\t or you can specify the port as the first arg"); + print("\t dir is /data/db// if not specified as the 2nd arg"); + print("\t returns a connection to the new server"); + return; + } + print("\t" + "db.help() help on db methods"); + print("\t" + "db.mycoll.help() help on collection methods"); + print("\t" + "rs.help() help on replica set methods"); + print("\t" + "help connect connecting to a db help"); + print("\t" + "help admin administrative help"); + print("\t" + "help misc misc things to know"); + print(); + print("\t" + "show dbs show database names"); + print("\t" + "show collections show collections in current database"); + print("\t" + "show users show users in current database"); + print("\t" + "show profile show most recent system.profile entries with time >= 1ms"); + print("\t" + "use set current database"); + print("\t" + "db.foo.find() list objects in collection foo"); + print("\t" + "db.foo.find( { a : 1 } ) list objects in foo where a == 1"); + print("\t" + "it result of the last line evaluated; use to further iterate"); + print("\t" + "exit quit the mongo shell"); +} diff -Nru mongodb-1.4.4/stdafx.cpp mongodb-1.6.3/stdafx.cpp --- mongodb-1.4.4/stdafx.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/stdafx.cpp 1969-12-31 16:00:00.000000000 -0800 @@ -1,37 +0,0 @@ -// stdafx.cpp : source file that includes just the standard includes - -/* Copyright 2009 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "stdafx.h" - -#if defined( __MSVC__ ) -// should probably check VS version here -#elif defined( __GNUC__ ) - -#if __GNUC__ < 4 -#error gcc < 4 not supported -#endif - -#else -// unknown compiler -#endif - - -namespace mongo { - - const char versionString[] = "1.4.4"; - -} // namespace mongo diff -Nru mongodb-1.4.4/stdafx.h mongodb-1.6.3/stdafx.h --- mongodb-1.4.4/stdafx.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/stdafx.h 1969-12-31 16:00:00.000000000 -0800 @@ -1,157 +0,0 @@ -// stdafx.h : include file for standard system include files, -// or project specific include files that are used frequently, but -// are changed infrequently -// - -/* Copyright 2009 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace mongo { - - using namespace std; - -#define NOMINMAX - -#if defined(_WIN32) - const bool debug=true; -#else - const bool debug=false; -#endif - - // pdfile versions - const int VERSION = 4; - const int VERSION_MINOR = 5; - - // mongo version - extern const char versionString[]; - - enum ExitCode { - EXIT_CLEAN = 0 , - EXIT_BADOPTIONS = 2 , - EXIT_REPLICATION_ERROR = 3 , - EXIT_NEED_UPGRADE = 4 , - EXIT_KILL = 12 , - EXIT_ABRUBT = 14 , - EXIT_NTSERVICE_ERROR = 20 , - EXIT_JAVA = 21 , - EXIT_OOM_MALLOC = 42 , - EXIT_OOM_REALLOC = 43 , - EXIT_FS = 45 , - EXIT_CLOCK_SKEW = 47 , - EXIT_POSSIBLE_CORRUPTION = 60 , // this means we detected a possible corruption situation, like a buf overflow - EXIT_UNCAUGHT = 100 , // top level exception that wasn't caught - EXIT_TEST = 101 , - - }; - - void dbexit( ExitCode returnCode, const char *whyMsg = ""); - - /** - this is here so you can't just type exit() to quit the program - you should either use dbexit to shutdown cleanly, or ::exit to tell the system to quiy - if you use this, you'll get a link error since mongo::exit isn't defined - */ - void exit( ExitCode returnCode ); - bool inShutdown(); - -} // namespace mongo - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "targetver.h" -#include "time.h" -#include "string.h" -#include "limits.h" - -///using namespace std; - -#undef yassert -#include -#include -#include -#include -#include -#include -#include -#include -#define BOOST_SPIRIT_THREADSAFE - -#include - -#if BOOST_VERSION >= 103800 -#define BOOST_SPIRIT_USE_OLD_NAMESPACE -#include -#include -#include -#else -#include -#include -#include -#endif - -#include -#include -#include -#include -#include -#undef assert -#define assert xassert -#define yassert 1 - -namespace mongo { - using namespace boost::filesystem; -} - -#include "util/debug_util.h" -#include "util/goodies.h" -#include "util/log.h" -#include "util/allocator.h" -#include "util/assert_util.h" - -namespace mongo { - - void sayDbContext(const char *msg = 0); - void rawOut( const string &s ); - -} // namespace mongo - -namespace mongo { - - const char * gitVersion(); - const char * sysInfo(); - string mongodVersion(); - - void printGitVersion(); - void printSysInfo(); - - typedef char _TCHAR; - -#define null (0) - -} // namespace mongo diff -Nru mongodb-1.4.4/tools/bridge.cpp mongodb-1.6.3/tools/bridge.cpp --- mongodb-1.4.4/tools/bridge.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/bridge.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,9 +16,10 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "../util/message.h" #include "../client/dbclient.h" +#include "../db/dbmessage.h" using namespace mongo; using namespace std; @@ -44,11 +45,28 @@ break; } - int oldId = m.data->id; - if ( m.data->operation() == dbQuery || m.data->operation() == dbMsg || m.data->operation() == dbGetMore ) { + int oldId = m.header()->id; + if ( m.operation() == dbQuery || m.operation() == dbMsg || m.operation() == dbGetMore ) { + bool exhaust = false; + if ( m.operation() == dbQuery ) { + DbMessage d( m ); + QueryMessage q( d ); + exhaust = q.queryOptions & QueryOption_Exhaust; + } Message response; dest.port().call( m, response ); mp_.reply( m, response, oldId ); + while ( exhaust ) { + MsgData *header = response.header(); + QueryResult *qr = (QueryResult *) header; + if ( qr->cursorId ) { + response.reset(); + dest.port().recv( response ); + mp_.reply( m, response ); // m argument is ignored anyway + } else { + exhaust = false; + } + } } else { dest.port().say( m, oldId ); } @@ -74,7 +92,7 @@ #if !defined(_WIN32) void cleanup( int sig ) { - close( listener->socket() ); + ListeningSockets::get()->closeAll(); for ( set::iterator i = ports.begin(); i != ports.end(); i++ ) (*i)->shutdown(); ::exit( 0 ); @@ -125,8 +143,7 @@ check( port != 0 && !destUri.empty() ); listener.reset( new MyListener( port ) ); - listener->init(); - listener->listen(); + listener->initAndListen(); return 0; } diff -Nru mongodb-1.4.4/tools/bsondump.cpp mongodb-1.6.3/tools/bsondump.cpp --- mongodb-1.4.4/tools/bsondump.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/tools/bsondump.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,132 @@ +// restore.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "../pch.h" +#include "../client/dbclient.h" +#include "../util/mmap.h" +#include "../util/text.h" +#include "tool.h" + +#include + +#include + +using namespace mongo; + +namespace po = boost::program_options; + +class BSONDump : public BSONTool { + + enum OutputType { JSON , DEBUG } _type; + +public: + + BSONDump() : BSONTool( "bsondump" ){ + add_options() + ("type" , po::value()->default_value("json") , "type of output: json,debug" ) + ; + add_hidden_options() + ("file" , po::value() , ".bson file" ) + ; + addPositionArg( "file" , 1 ); + _noconnection = true; + } + + virtual void printExtraHelp(ostream& out) { + out << "usage: " << _name << " [options] " << endl; + } + + virtual int doRun(){ + { + string t = getParam( "type" ); + if ( t == "json" ) + _type = JSON; + else if ( t == "debug" ) + _type = DEBUG; + else { + cerr << "bad type: " << t << endl; + return 1; + } + } + processFile( getParam( "file" ) ); + return 0; + } + + bool debug( const BSONObj& o , int depth=0){ + string prefix = ""; + for ( int i=0; i o.objsize() ){ + cout << prefix << " SIZE DOES NOT WORK" << endl; + return false; + } + read += e.size(); + try { + e.validate(); + if ( e.isABSONObj() ){ + if ( ! debug( e.Obj() , depth + 1 ) ) + return false; + } + else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ){ + cout << prefix << "\t\t\t" << "bad utf8 String!" << endl; + } + else if ( logLevel > 0 ){ + cout << prefix << "\t\t\t" << e << endl; + } + + } + catch ( std::exception& e ){ + cout << prefix << "\t\t\t bad value: " << e.what() << endl; + } + } + } + catch ( std::exception& e ){ + cout << prefix << "\t" << e.what() << endl; + } + return true; + } + + virtual void gotObject( const BSONObj& o ){ + switch ( _type ){ + case JSON: + cout << o << endl; + break; + case DEBUG: + debug(o); + break; + default: + cerr << "bad type? : " << _type << endl; + } + } +}; + +int main( int argc , char ** argv ) { + BSONDump dump; + return dump.main( argc , argv ); +} diff -Nru mongodb-1.4.4/tools/dump.cpp mongodb-1.6.3/tools/dump.cpp --- mongodb-1.4.4/tools/dump.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/dump.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "../stdafx.h" +#include "../pch.h" #include "../client/dbclient.h" #include "tool.h" @@ -31,6 +31,7 @@ Dump() : Tool( "dump" , true , "*" ){ add_options() ("out,o", po::value()->default_value("dump"), "output directory") + ("query,q", po::value() , "json query" ) ; } @@ -39,11 +40,17 @@ ofstream out; out.open( outputFile.string().c_str() , ios_base::out | ios_base::binary ); - ASSERT_STREAM_GOOD( 10262 , "couldn't open file" , out ); + assertStreamGood( 10262 , "couldn't open file" , out ); ProgressMeter m( conn( true ).count( coll.c_str() , BSONObj() , QueryOption_SlaveOk ) ); - auto_ptr cursor = conn( true ).query( coll.c_str() , Query().snapshot() , 0 , 0 , 0 , QueryOption_SlaveOk | QueryOption_NoCursorTimeout ); + Query q; + if ( _query.isEmpty() ) + q.snapshot(); + else + q = _query; + + auto_ptr cursor = conn( true ).query( coll.c_str() , q , 0 , 0 , 0 , QueryOption_SlaveOk | QueryOption_NoCursorTimeout ); while ( cursor->more() ) { BSONObj obj = cursor->next(); @@ -80,8 +87,14 @@ } } - + int run(){ + + { + string q = getParam("query"); + if ( q.size() ) + _query = fromjson( q ); + } path root( getParam("out") ); string db = _db; @@ -113,6 +126,7 @@ return 0; } + BSONObj _query; }; int main( int argc , char ** argv ) { diff -Nru mongodb-1.4.4/tools/export.cpp mongodb-1.6.3/tools/export.cpp --- mongodb-1.4.4/tools/export.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/export.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "client/dbclient.h" #include "db/json.h" @@ -34,17 +34,20 @@ class Export : public Tool { public: Export() : Tool( "export" ){ + addFieldOptions(); add_options() ("query,q" , po::value() , "query filter, as a JSON string" ) - ("fields,f" , po::value() , "comma seperated list of field names e.g. -f name,age" ) ("csv","export to csv instead of json") ("out,o", po::value(), "output file; if not specified, stdout is used") + ("jsonArray", "output to a json array rather than one object per line") ; + _usesstdout = false; } int run(){ string ns; const bool csv = hasParam( "csv" ); + const bool jsonArray = hasParam( "jsonArray" ); ostream *outPtr = &cout; string outfile = getParam( "out" ); auto_ptr fileStream; @@ -76,7 +79,7 @@ auth(); - if ( hasParam( "fields" ) ){ + if ( hasParam( "fields" ) || csv ){ needFields(); fieldsToReturn = &_fieldsObj; } @@ -99,6 +102,9 @@ out << endl; } + if (jsonArray) + out << '['; + long long num = 0; while ( cursor->more() ) { num++; @@ -115,10 +121,18 @@ out << endl; } else { - out << obj.jsonString() << endl; + if (jsonArray && num != 1) + out << ','; + + out << obj.jsonString(); + + if (!jsonArray) + out << endl; } } + if (jsonArray) + out << ']' << endl; cerr << "exported " << num << " records" << endl; diff -Nru mongodb-1.4.4/tools/files.cpp mongodb-1.6.3/tools/files.cpp --- mongodb-1.4.4/tools/files.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/files.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "client/gridfs.h" #include "client/dbclient.h" @@ -140,14 +140,14 @@ } conn().getLastError(); - cout << "done!"; + cout << "done!" << endl; return 0; } if ( cmd == "delete" ){ g.removeFile(filename); conn().getLastError(); - cout << "done!"; + cout << "done!" << endl; return 0; } diff -Nru mongodb-1.4.4/tools/import.cpp mongodb-1.6.3/tools/import.cpp --- mongodb-1.4.4/tools/import.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/import.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,11 +16,12 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "client/dbclient.h" #include "db/json.h" #include "tool.h" +#include "../util/text.h" #include #include @@ -39,6 +40,10 @@ const char * _sep; bool _ignoreBlanks; bool _headerLine; + bool _upsert; + bool _doimport; + bool _jsonArray; + vector _upsertFields; void _append( BSONObjBuilder& b , const string& fieldName , const string& data ){ if ( b.appendAsNumber( fieldName , data ) ) @@ -48,10 +53,12 @@ return; // TODO: other types? - b.append( fieldName.c_str() , data ); + b.append( fieldName , data ); } BSONObj parseLine( char * line ){ + uassert(13289, "Invalid UTF8 character detected", isValidUTF8(line)); + if ( _type == JSON ){ char * end = ( line + strlen( line ) ) - 1; while ( isspace(*end) ){ @@ -137,11 +144,21 @@ ("file",po::value() , "file to import from; if not specified stdin is used" ) ("drop", "drop collection first " ) ("headerline","CSV,TSV only - use first line as headers") + ("upsert", "insert or update objects that already exist" ) + ("upsertFields", po::value(), "comma-separated fields for the query part of the upsert. You should make sure this is indexed" ) + ("stopOnError", "stop importing at first error rather than continuing" ) + ("jsonArray", "load a json array, not one item per line. Currently limited to 4MB." ) + ; + add_hidden_options() + ("noimport", "don't actually import. useful for benchmarking parser" ) ; addPositionArg( "file" , 1 ); _type = JSON; _ignoreBlanks = false; _headerLine = false; + _upsert = false; + _doimport = true; + _jsonArray = false; } int run(){ @@ -183,6 +200,21 @@ _ignoreBlanks = true; } + if ( hasParam( "upsert" ) ){ + _upsert = true; + + string uf = getParam("upsertFields"); + if (uf.empty()){ + _upsertFields.push_back("_id"); + } else { + StringSplitter(uf.c_str(), ",").split(_upsertFields); + } + } + + if ( hasParam( "noimport" ) ){ + _doimport = false; + } + if ( hasParam( "type" ) ){ string type = getParam( "type" ); if ( type == "json" ) @@ -207,6 +239,10 @@ needFields(); } + if (_type == JSON && hasParam("jsonArray")){ + _jsonArray = true; + } + int errors = 0; int num = 0; @@ -217,38 +253,91 @@ ProgressMeter pm( fileSize ); const int BUF_SIZE = 1024 * 1024 * 4; boost::scoped_array line(new char[BUF_SIZE+2]); - while ( *in ){ - char * buf = line.get(); - in->getline( buf , BUF_SIZE ); - uassert( 10263 , "unknown error reading file" , ( in->rdstate() & ios_base::badbit ) == 0 ); - log(1) << "got line:" << buf << endl; - - while( isspace( buf[0] ) ) buf++; - - int len = strlen( buf ); - if ( ! len ) - continue; - - buf[len+1] = 0; - - if ( in->rdstate() == ios_base::eofbit ) - break; - assert( in->rdstate() == 0 ); + char * buf = line.get(); + while ( _jsonArray || in->rdstate() == 0 ){ + if (_jsonArray){ + if (buf == line.get()){ //first pass + in->read(buf, BUF_SIZE); + uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit)); + buf[ in->gcount() ] = '\0'; + } + } else { + buf = line.get(); + in->getline( buf , BUF_SIZE ); + log(1) << "got line:" << buf << endl; + } + uassert( 10263 , "unknown error reading file" , + (!(in->rdstate() & ios_base::badbit)) && + (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) ); + + int len = 0; + if (strncmp("\xEF\xBB\xBF", buf, 3) == 0){ // UTF-8 BOM (notepad is stupid) + buf += 3; + len += 3; + } + + if (_jsonArray){ + while (buf[0] != '{' && buf[0] != '\0') { + len++; + buf++; + } + if (buf[0] == '\0') + break; + } else { + while (isspace( buf[0] )){ + len++; + buf++; + } + if (buf[0] == '\0') + continue; + len += strlen( buf ); + } try { - BSONObj o = parseLine( buf ); - if ( _headerLine ) + BSONObj o; + if (_jsonArray){ + int jslen; + o = fromjson(buf, &jslen); + len += jslen; + buf += jslen; + } else { + o = parseLine( buf ); + } + + if ( _headerLine ){ _headerLine = false; - else - conn().insert( ns.c_str() , o ); + } else if (_doimport) { + bool doUpsert = _upsert; + BSONObjBuilder b; + if (_upsert){ + for (vector::const_iterator it=_upsertFields.begin(), end=_upsertFields.end(); it!=end; ++it){ + BSONElement e = o.getFieldDotted(it->c_str()); + if (e.eoo()){ + doUpsert = false; + break; + } + b.appendAs(e, *it); + } + } + + if (doUpsert){ + conn().update(ns, Query(b.obj()), o, true); + } else { + conn().insert( ns.c_str() , o ); + } + } + + num++; } catch ( std::exception& e ){ cout << "exception:" << e.what() << endl; cout << buf << endl; errors++; + + if (hasParam("stopOnError") || _jsonArray) + break; } - num++; if ( pm.hit( len + 1 ) ){ cout << "\t\t\t" << num << "\t" << ( num / ( time(0) - start ) ) << "/second" << endl; } diff -Nru mongodb-1.4.4/tools/restore.cpp mongodb-1.6.3/tools/restore.cpp --- mongodb-1.4.4/tools/restore.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/restore.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "../stdafx.h" +#include "../pch.h" #include "../client/dbclient.h" #include "../util/mmap.h" #include "tool.h" @@ -29,16 +29,17 @@ namespace po = boost::program_options; -class Restore : public Tool { +class Restore : public BSONTool { public: bool _drop; - bool _objcheck; - - Restore() : Tool( "restore" , true , "" , "" ) , _drop(false),_objcheck(false){ + bool _indexesLast; + const char * _curns; + + Restore() : BSONTool( "restore" ) , _drop(false){ add_options() ("drop" , "drop each collection before import" ) - ("objcheck" , "validate object before inserting" ) + ("indexesLast" , "wait to add indexes (faster if data isn't inserted in index order)" ) ; add_hidden_options() ("dir", po::value()->default_value("dump"), "directory to restore from") @@ -50,11 +51,11 @@ out << "usage: " << _name << " [options] [directory or filename to restore from]" << endl; } - int run(){ + virtual int doRun(){ auth(); path root = getParam("dir"); _drop = hasParam( "drop" ); - _objcheck = hasParam( "objcheck" ); + _indexesLast = hasParam("indexesLast"); /* If _db is not "" then the user specified a db name to restore as. * @@ -76,6 +77,7 @@ if ( is_directory( root ) ) { directory_iterator end; directory_iterator i(root); + path indexes; while ( i != end ) { path p = *i; i++; @@ -98,8 +100,15 @@ } } - drillDown(p, use_db, use_coll); + if ( _indexesLast && p.leaf() == "system.indexes.bson" ) + indexes = p; + else + drillDown(p, use_db, use_coll); } + + if (!indexes.empty()) + drillDown(indexes, use_db, use_coll); + return; } @@ -109,18 +118,29 @@ return; } - out() << root.string() << endl; + log() << root.string() << endl; + + if ( root.leaf() == "system.profile.bson" ){ + log() << "\t skipping" << endl; + return; + } string ns; if (use_db) { ns += _db; - } else { + } + else { string dir = root.branch_path().string(); if ( dir.find( "/" ) == string::npos ) ns += dir; else ns += dir.substr( dir.find_last_of( "/" ) + 1 ); + + if ( ns.size() == 0 ) + ns = "test"; } + + assert( ns.size() ); if (use_coll) { ns += "." + _coll; @@ -130,76 +150,22 @@ ns += "." + l; } - long long fileLength = file_size( root ); - - if ( fileLength == 0 ) { - out() << "file " + root.native_file_string() + " empty, skipping" << endl; - return; - } - out() << "\t going into namespace [" << ns << "]" << endl; if ( _drop ){ out() << "\t dropping" << endl; conn().dropCollection( ns ); } + + _curns = ns.c_str(); + processFile( root ); + } - string fileString = root.string(); - ifstream file( fileString.c_str() , ios_base::in | ios_base::binary); - if ( ! file.is_open() ){ - log() << "error opening file: " << fileString << endl; - return; - } - - log(1) << "\t file size: " << fileLength << endl; - - long long read = 0; - long long num = 0; - - const int BUF_SIZE = 1024 * 1024 * 5; - boost::scoped_array buf_holder(new char[BUF_SIZE]); - char * buf = buf_holder.get(); - - ProgressMeter m( fileLength ); - - while ( read < fileLength ) { - file.read( buf , 4 ); - int size = ((int*)buf)[0]; - if ( size >= BUF_SIZE ){ - cerr << "got an object of size: " << size << " terminating..." << endl; - } - uassert( 10264 , "invalid object size" , size < BUF_SIZE ); - - file.read( buf + 4 , size - 4 ); - - BSONObj o( buf ); - if ( _objcheck && ! o.valid() ){ - cerr << "INVALID OBJECT - going try and pring out " << endl; - cerr << "size: " << size << endl; - BSONObjIterator i(o); - while ( i.more() ){ - BSONElement e = i.next(); - try { - e.validate(); - } - catch ( ... ){ - cerr << "\t\t NEXT ONE IS INVALID" << endl; - } - cerr << "\t name : " << e.fieldName() << " " << e.type() << endl; - cerr << "\t " << e << endl; - } - } - conn().insert( ns.c_str() , o ); - - read += o.objsize(); - num++; - - m.hit( o.objsize() ); - } - - uassert( 10265 , "counts don't match" , m.done() == fileLength ); - out() << "\t " << m.hits() << " objects" << endl; + virtual void gotObject( const BSONObj& obj ){ + conn().insert( _curns , obj ); } + + }; int main( int argc , char ** argv ) { diff -Nru mongodb-1.4.4/tools/sniffer.cpp mongodb-1.6.3/tools/sniffer.cpp --- mongodb-1.4.4/tools/sniffer.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/sniffer.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -34,7 +34,7 @@ #undef max #endif -#include "../util/builder.h" +#include "../bson/util/builder.h" #include "../util/message.h" #include "../util/mmap.h" #include "../db/dbmessage.h" @@ -74,6 +74,10 @@ int captureHeaderSize; set serverPorts; string forwardAddress; +bool objcheck = false; + +ostream *outPtr = &cout; +ostream &out() { return *outPtr; } /* IP header */ struct sniff_ip { @@ -205,23 +209,23 @@ if ( bytesRemainingInMessage[ c ] == 0 ) { m.setData( (MsgData*)payload , false ); - if ( !m.data->valid() ) { + if ( !m.header()->valid() ) { cerr << "Invalid message start, skipping packet." << endl; return; } - if ( size_payload > m.data->len ) { + if ( size_payload > m.header()->len ) { cerr << "Multiple messages in packet, skipping packet." << endl; return; } - if ( size_payload < m.data->len ) { - bytesRemainingInMessage[ c ] = m.data->len - size_payload; + if ( size_payload < m.header()->len ) { + bytesRemainingInMessage[ c ] = m.header()->len - size_payload; messageBuilder[ c ].reset( new BufBuilder() ); - messageBuilder[ c ]->append( (void*)payload, size_payload ); + messageBuilder[ c ]->appendBuf( (void*)payload, size_payload ); return; } } else { bytesRemainingInMessage[ c ] -= size_payload; - messageBuilder[ c ]->append( (void*)payload, size_payload ); + messageBuilder[ c ]->appendBuf( (void*)payload, size_payload ); if ( bytesRemainingInMessage[ c ] < 0 ) { cerr << "Received too many bytes to complete message, resetting buffer" << endl; bytesRemainingInMessage[ c ] = 0; @@ -237,84 +241,103 @@ DbMessage d( m ); - cout << inet_ntoa(ip->ip_src) << ":" << ntohs( tcp->th_sport ) - << ( serverPorts.count( ntohs( tcp->th_dport ) ) ? " -->> " : " <<-- " ) - << inet_ntoa(ip->ip_dst) << ":" << ntohs( tcp->th_dport ) - << " " << d.getns() - << " " << m.data->len << " bytes " - << " id:" << hex << m.data->id << dec << "\t" << m.data->id; + out() << inet_ntoa(ip->ip_src) << ":" << ntohs( tcp->th_sport ) + << ( serverPorts.count( ntohs( tcp->th_dport ) ) ? " -->> " : " <<-- " ) + << inet_ntoa(ip->ip_dst) << ":" << ntohs( tcp->th_dport ) + << " " << d.getns() + << " " << m.header()->len << " bytes " + << " id:" << hex << m.header()->id << dec << "\t" << m.header()->id; processMessage( c , m ); } +class AuditingDbMessage : public DbMessage { +public: + AuditingDbMessage( const Message &m ) : DbMessage( m ) {} + BSONObj nextJsObj( const char *context ) { + BSONObj ret = DbMessage::nextJsObj(); + if ( objcheck && !ret.valid() ) { + // TODO provide more debugging info + cout << "invalid object in " << context << ": " << ret.hexDump() << endl; + } + return ret; + } +}; + void processMessage( Connection& c , Message& m ){ - DbMessage d(m); + AuditingDbMessage d(m); + + if ( m.operation() == mongo::opReply ) + out() << " - " << (unsigned)m.header()->responseTo; + out() << endl; - if ( m.data->operation() == mongo::opReply ) - cout << " - " << m.data->responseTo; - cout << endl; - - switch( m.data->operation() ){ - case mongo::opReply:{ - mongo::QueryResult* r = (mongo::QueryResult*)m.data; - cout << "\treply" << " n:" << r->nReturned << " cursorId: " << r->cursorId << endl; - if ( r->nReturned ){ - mongo::BSONObj o( r->data() , 0 ); - cout << "\t" << o << endl; + try { + switch( m.operation() ){ + case mongo::opReply:{ + mongo::QueryResult* r = (mongo::QueryResult*)m.singleData(); + out() << "\treply" << " n:" << r->nReturned << " cursorId: " << r->cursorId << endl; + if ( r->nReturned ){ + mongo::BSONObj o( r->data() , 0 ); + out() << "\t" << o << endl; + } + break; + } + case mongo::dbQuery:{ + mongo::QueryMessage q(d); + out() << "\tquery: " << q.query << " ntoreturn: " << q.ntoreturn << " ntoskip: " << q.ntoskip << endl; + break; + } + case mongo::dbUpdate:{ + int flags = d.pullInt(); + BSONObj q = d.nextJsObj( "update" ); + BSONObj o = d.nextJsObj( "update" ); + out() << "\tupdate flags:" << flags << " q:" << q << " o:" << o << endl; + break; + } + case mongo::dbInsert:{ + out() << "\tinsert: " << d.nextJsObj( "insert" ) << endl; + while ( d.moreJSObjs() ) { + out() << "\t\t" << d.nextJsObj( "insert" ) << endl; + } + break; + } + case mongo::dbGetMore:{ + int nToReturn = d.pullInt(); + long long cursorId = d.pullInt64(); + out() << "\tgetMore nToReturn: " << nToReturn << " cursorId: " << cursorId << endl; + break; + } + case mongo::dbDelete:{ + int flags = d.pullInt(); + BSONObj q = d.nextJsObj( "delete" ); + out() << "\tdelete flags: " << flags << " q: " << q << endl; + break; + } + case mongo::dbKillCursors:{ + int *x = (int *) m.singleData()->_data; + x++; // reserved + int n = *x; + out() << "\tkillCursors n: " << n << endl; + break; + } + default: + cerr << "*** CANNOT HANDLE TYPE: " << m.operation() << endl; } - break; - } - case mongo::dbQuery:{ - mongo::QueryMessage q(d); - cout << "\tquery: " << q.query << " ntoreturn: " << q.ntoreturn << " ntoskip: " << q.ntoskip << endl; - break; - } - case mongo::dbUpdate:{ - int flags = d.pullInt(); - BSONObj q = d.nextJsObj(); - BSONObj o = d.nextJsObj(); - cout << "\tupdate flags:" << flags << " q:" << q << " o:" << o << endl; - break; - } - case mongo::dbInsert:{ - cout << "\tinsert: " << d.nextJsObj() << endl; - while ( d.moreJSObjs() ) - cout << "\t\t" << d.nextJsObj() << endl; - break; - } - case mongo::dbGetMore:{ - int nToReturn = d.pullInt(); - long long cursorId = d.pullInt64(); - cout << "\tgetMore nToReturn: " << nToReturn << " cursorId: " << cursorId << endl; - break; - } - case mongo::dbDelete:{ - int flags = d.pullInt(); - BSONObj q = d.nextJsObj(); - cout << "\tdelete flags: " << flags << " q: " << q << endl; - break; - } - case mongo::dbKillCursors:{ - int *x = (int *) m.data->_data; - x++; // reserved - int n = *x; - cout << "\tkillCursors n: " << n << endl; - break; - } - default: - cerr << "*** CANNOT HANDLE TYPE: " << m.data->operation() << endl; + } catch ( ... ) { + cerr << "Error parsing message for operation: " << m.operation() << endl; } - + + if ( !forwardAddress.empty() ) { - if ( m.data->operation() != mongo::opReply ) { + if ( m.operation() != mongo::opReply ) { boost::shared_ptr conn = forwarder[ c ]; if ( !conn ) { conn.reset(new DBClientConnection( true )); conn->connect( forwardAddress ); forwarder[ c ] = conn; } - if ( m.data->operation() == mongo::dbQuery || m.data->operation() == mongo::dbGetMore ) { - if ( m.data->operation() == mongo::dbGetMore ) { + if ( m.operation() == mongo::dbQuery || m.operation() == mongo::dbGetMore ) { + if ( m.operation() == mongo::dbGetMore ) { DbMessage d( m ); d.pullInt(); long long &cId = d.pullInt64(); @@ -322,8 +345,8 @@ } Message response; conn->port().call( m, response ); - QueryResult *qr = (QueryResult *) response.data; - if ( !( qr->resultFlags() & QueryResult::ResultFlag_CursorNotFound ) ) { + QueryResult *qr = (QueryResult *) response.singleData(); + if ( !( qr->resultFlags() & mongo::ResultFlag_CursorNotFound ) ) { if ( qr->cursorId != 0 ) { lastCursor[ c ] = qr->cursorId; return; @@ -336,9 +359,9 @@ } else { Connection r = c.reverse(); long long myCursor = lastCursor[ r ]; - QueryResult *qr = (QueryResult *) m.data; + QueryResult *qr = (QueryResult *) m.singleData(); long long yourCursor = qr->cursorId; - if ( ( qr->resultFlags() & QueryResult::ResultFlag_CursorNotFound ) ) + if ( ( qr->resultFlags() & mongo::ResultFlag_CursorNotFound ) ) yourCursor = 0; if ( myCursor && !yourCursor ) cerr << "Expected valid cursor in sniffed response, found none" << endl; @@ -366,7 +389,7 @@ long read = 0; while ( read < length ){ Message m(pos,false); - int len = m.data->len; + int len = m.header()->len; DbMessage d(m); cout << len << " " << d.getns() << endl; @@ -389,6 +412,9 @@ " or a file containing output from mongod's --diaglog option.\n" " If no source is specified, mongosniff will attempt to sniff\n" " from one of the machine's network interfaces.\n" + "--objcheck Log hex representation of invalid BSON objects and nothing\n" + " else. Spurious messages about invalid objects may result\n" + " when there are dropped tcp packets.\n" "... These parameters are used to filter sniffing. By default, \n" " only port 27017 is sniffed.\n" "--help Print this help message.\n" @@ -397,6 +423,9 @@ int main(int argc, char **argv){ + stringstream nullStream; + nullStream.clear(ios::failbit); + const char *dev = NULL; char errbuf[PCAP_ERRBUF_SIZE]; pcap_t *handle; @@ -435,6 +464,10 @@ else dev = args[ ++i ]; } + else if ( arg == string( "--objcheck" ) ) { + objcheck = true; + outPtr = &nullStream; + } else { serverPorts.insert( atoi( args[ i ] ) ); } diff -Nru mongodb-1.4.4/tools/stat.cpp mongodb-1.6.3/tools/stat.cpp --- mongodb-1.4.4/tools/stat.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/stat.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,9 +16,10 @@ * along with this program. If not, see . */ -#include "stdafx.h" +#include "pch.h" #include "client/dbclient.h" #include "db/json.h" +#include "../util/httpclient.h" #include "tool.h" @@ -38,6 +39,7 @@ _sleep = 1; _rowNum = 0; _showHeaders = true; + _http = false; add_hidden_options() ( "sleep" , po::value() , "time to sleep between calls" ) @@ -45,9 +47,12 @@ add_options() ("noheaders", "don't output column names") ("rowcount,n", po::value()->default_value(0), "number of stats lines to print (0 for indefinite)") + ("http", "use http instead of raw db connection") ; addPositionArg( "sleep" , 1 ); + + _autoreconnect = true; } virtual void printExtraHelp( ostream & out ){ @@ -55,7 +60,55 @@ out << "sleep time: time to wait (in seconds) between calls" << endl; } + virtual void printExtraHelpAfter( ostream & out ){ + out << "\n"; + out << " Fields\n"; + out << " inserts/s \t- # of inserts per second\n"; + out << " query/s \t- # of queries per second\n"; + out << " update/s \t- # of updates per second\n"; + out << " delete/s \t- # of deletes per second\n"; + out << " getmore/s \t- # of get mores (cursor batch) per second\n"; + out << " command/s \t- # of commands per second\n"; + out << " flushes/s \t- # of fsync flushes per second\n"; + out << " mapped \t- amount of data mmaped (total data size) megabytes\n"; + out << " visze \t- virtual size of process in megabytes\n"; + out << " res \t- resident size of process in megabytes\n"; + out << " faults/s \t- # of pages faults/sec (linux only)\n"; + out << " locked \t- percent of time in global write lock\n"; + out << " idx miss \t- percent of btree page misses (sampled)\n"; + out << " q t|r|w \t- ops waiting for lock from db.currentOp() (total|read|write)\n"; + out << " conn \t- number of open connections\n"; + } + + BSONObj stats(){ + if ( _http ){ + HttpClient c; + HttpClient::Result r; + + string url; + { + stringstream ss; + ss << "http://" << _host; + if ( _host.find( ":" ) == string::npos ) + ss << ":28017"; + ss << "/_status"; + url = ss.str(); + } + + if ( c.get( url , &r ) != 200 ){ + cout << "error (http): " << r.getEntireResponse() << endl; + return BSONObj(); + } + + BSONObj x = fromjson( r.getBody() ); + BSONElement e = x["serverStatus"]; + if ( e.type() != Object ){ + cout << "BROKEN: " << x << endl; + return BSONObj(); + } + return e.embeddedObjectUserCheck(); + } BSONObj out; if ( ! conn().simpleCommand( _db , &out , "serverStatus" ) ){ cout << "error: " << out << endl; @@ -77,7 +130,9 @@ double y = ( b.getFieldDotted( outof ).number() - a.getFieldDotted( outof ).number() ); if ( y == 0 ) return 0; - return x / y; + double p = x / y; + p = (double)((int)(p * 1000)) / 10; + return p; } void cellstart( stringstream& ss , string name , unsigned& width ){ @@ -101,12 +156,10 @@ } void cell( stringstream& ss , string name , unsigned width , const string& val ){ - assert( val.size() <= width ); cellstart( ss , name , width ); ss << setw(width) << val << " "; } - string doRow( const BSONObj& a , const BSONObj& b ){ stringstream ss; @@ -120,6 +173,13 @@ } } + if ( b["backgroundFlushing"].type() == Object ){ + BSONObj ax = a["backgroundFlushing"].embeddedObject(); + BSONObj bx = b["backgroundFlushing"].embeddedObject(); + BSONObjIterator i( bx ); + cell( ss , "flushes/s" , 6 , (int)diff( "flushes" , ax , bx ) ); + } + if ( b.getFieldDotted("mem.supported").trueValue() ){ BSONObj bx = b["mem"].embeddedObject(); BSONObjIterator i( bx ); @@ -127,10 +187,24 @@ cell( ss , "vsize" , 6 , bx["virtual"].numberInt() ); cell( ss , "res" , 6 , bx["resident"].numberInt() ); } + + if ( b["extra_info"].type() == Object ){ + BSONObj ax = a["extra_info"].embeddedObject(); + BSONObj bx = b["extra_info"].embeddedObject(); + if ( ax["page_faults"].type() || ax["page_faults"].type() ) + cell( ss , "faults/s" , 6 , (int)diff( "page_faults" , ax , bx ) ); + } - cell( ss , "% locked" , 8 , percent( "globalLock.totalTime" , "globalLock.lockTime" , a , b ) ); - cell( ss , "% idx miss" , 8 , percent( "indexCounters.btree.accesses" , "indexCounters.btree.misses" , a , b ) ); + cell( ss , "locked %" , 8 , percent( "globalLock.totalTime" , "globalLock.lockTime" , a , b ) ); + cell( ss , "idx miss %" , 8 , percent( "indexCounters.btree.accesses" , "indexCounters.btree.misses" , a , b ) ); + if ( b.getFieldDotted( "globalLock.currentQueue" ).type() == Object ){ + int r = b.getFieldDotted( "globalLock.currentQueue.readers" ).numberInt(); + int w = b.getFieldDotted( "globalLock.currentQueue.writers" ).numberInt(); + stringstream temp; + temp << r+w << "|" << r << "|" << w; + cell( ss , "q t|r|w" , 10 , temp.str() ); + } cell( ss , "conn" , 5 , b.getFieldDotted( "connections.current" ).numberInt() ); { @@ -142,7 +216,7 @@ << setfill('0') << setw(2) << t.tm_min << ":" << setfill('0') << setw(2) << t.tm_sec; - cell( ss , "time" , 8 , temp.str() ); + cell( ss , "time" , 10 , temp.str() ); } if ( _showHeaders && _rowNum % 20 == 0 ){ @@ -154,6 +228,13 @@ return ss.str(); } + virtual void preSetup(){ + if ( hasParam( "http" ) ){ + _http = true; + _noconnection = true; + } + } + int run(){ _sleep = getParam( "sleep" , _sleep ); if ( hasParam( "noheaders" ) ) { @@ -167,11 +248,26 @@ while ( _rowCount == 0 || _rowNum < _rowCount ){ sleepsecs(_sleep); - BSONObj now = stats(); + BSONObj now; + try { + now = stats(); + } + catch ( std::exception& e ){ + cout << "can't get data: " << e.what() << endl; + continue; + } + if ( now.isEmpty() ) return -2; - cout << doRow( prev , now ) << endl; + try { + cout << doRow( prev , now ) << endl; + } + catch ( AssertionException& e ){ + cout << "\nerror: " << e.what() << "\n" + << now + << endl; + } prev = now; } @@ -183,6 +279,7 @@ int _rowNum; int _rowCount; bool _showHeaders; + bool _http; }; } diff -Nru mongodb-1.4.4/tools/tool.cpp mongodb-1.6.3/tools/tool.cpp --- mongodb-1.4.4/tools/tool.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/tool.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -24,6 +24,7 @@ #include #include "util/file_allocator.h" +#include "util/password.h" using namespace std; using namespace mongo; @@ -34,24 +35,28 @@ CmdLine cmdLine; - Tool::Tool( string name , bool localDBAllowed , string defaultDB , string defaultCollection ) : - _name( name ) , _db( defaultDB ) , _coll( defaultCollection ) , _conn(0), _paired(false) { - + Tool::Tool( string name , bool localDBAllowed , string defaultDB , + string defaultCollection , bool usesstdout ) : + _name( name ) , _db( defaultDB ) , _coll( defaultCollection ) , + _usesstdout(usesstdout), _noconnection(false), _autoreconnect(false), _conn(0), _paired(false) { + _options = new po::options_description( "options" ); _options->add_options() ("help","produce help message") ("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)") ("host,h",po::value(), "mongo host to connect to (\"left,right\" for pairs)" ) + ("port",po::value(), "server port. Can also use --host hostname:port" ) ("db,d",po::value(), "database to use" ) ("collection,c",po::value(), "collection to use (some commands)" ) ("username,u",po::value(), "username" ) - ("password,p",po::value(), "password" ) + ("password,p", new PasswordValue( &_password ), "password" ) + ("ipv6", "enable IPv6 support (disabled by default)") ; if ( localDBAllowed ) _options->add_options() - ("dbpath",po::value(), "directly access mongod data " - "files in the given path, instead of connecting to a mongod " - "instance - needs to lock the data directory, so cannot be " + ("dbpath",po::value(), "directly access mongod database " + "files in the given path, instead of connecting to a mongod " + "server - needs to lock the data directory, so cannot be " "used if a mongod is currently accessing the same path" ) ("directoryperdb", "if dbpath specified, each db is in a separate directory" ) ; @@ -71,12 +76,10 @@ delete _conn; } - void Tool::printExtraHelp( ostream & out ){ - } - void Tool::printHelp(ostream &out) { printExtraHelp(out); _options->print(out); + printExtraHelpAfter(out); } int Tool::main( int argc , char ** argv ){ @@ -109,8 +112,18 @@ return EXIT_BADOPTIONS; } + // hide password from ps output + for (int i=0; i < (argc-1); ++i){ + if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")){ + char* arg = argv[i+1]; + while (*arg){ + *arg++ = 'x'; + } + } + } + if ( _params.count( "help" ) ){ - printHelp(cerr); + printHelp(cout); return 0; } @@ -123,37 +136,39 @@ logLevel = s.length(); } } + + preSetup(); bool useDirectClient = hasParam( "dbpath" ); - + if ( ! useDirectClient ) { _host = "127.0.0.1"; if ( _params.count( "host" ) ) _host = _params["host"].as(); - if ( _host.find( "," ) == string::npos ){ - DBClientConnection * c = new DBClientConnection(); - _conn = c; - - string errmsg; - if ( ! c->connect( _host , errmsg ) ){ - cerr << "couldn't connect to [" << _host << "] " << errmsg << endl; - return -1; - } + if ( _params.count( "port" ) ) + _host += ':' + _params["port"].as(); + + if ( _noconnection ){ + // do nothing } else { - log(1) << "using pairing" << endl; - DBClientPaired * c = new DBClientPaired(); - _paired = true; - _conn = c; + string errmsg; - if ( ! c->connect( _host ) ){ - cerr << "couldn't connect to paired server: " << _host << endl; + ConnectionString cs = ConnectionString::parse( _host , errmsg ); + if ( ! cs.isValid() ){ + cerr << "invalid hostname [" << _host << "] " << errmsg << endl; + return -1; + } + + _conn = cs.connect( errmsg ); + if ( ! _conn ){ + cerr << "couldn't connect to [" << _host << "] " << errmsg << endl; return -1; } } - - cerr << "connected to: " << _host << endl; + + (_usesstdout ? cout : cerr ) << "connected to: " << _host << endl; } else { if ( _params.count( "directoryperdb" ) ) { @@ -168,7 +183,7 @@ try { acquirePathLock(); } - catch ( DBException& e ){ + catch ( DBException& ){ cerr << endl << "If you are running a mongod on the same " "path you should connect to that instead of direct data " "file access" << endl << endl; @@ -188,8 +203,13 @@ if ( _params.count( "username" ) ) _username = _params["username"].as(); - if ( _params.count( "password" ) ) - _password = _params["password"].as(); + if ( _params.count( "password" ) + && ( _password.empty() ) ) { + _password = askPassword(); + } + + if (_params.count("ipv6")) + enableIPv6(); int ret = -1; try { @@ -209,14 +229,15 @@ } DBClientBase& Tool::conn( bool slaveIfPaired ){ - if ( _paired && slaveIfPaired ) - return ((DBClientPaired*)_conn)->slaveConn(); + // TODO: _paired is deprecated + if ( slaveIfPaired && _conn->type() == ConnectionString::SET ) + return ((DBClientReplicaSet*)_conn)->slaveConn(); return *_conn; } void Tool::addFieldOptions(){ add_options() - ("fields,f" , po::value() , "comma seperated list of field names e.g. -f name,age" ) + ("fields,f" , po::value() , "comma separated list of field names e.g. -f name,age" ) ("fieldFile" , po::value() , "file with fields names - 1 per line" ) ; } @@ -230,10 +251,10 @@ pcrecpp::StringPiece input(fields_arg); string f; - pcrecpp::RE re("([\\w\\.\\s]+),?" ); + pcrecpp::RE re("([#\\w\\.\\s\\-]+),?" ); while ( re.Consume( &input, &f ) ){ _fields.push_back( f ); - b.append( f.c_str() , 1 ); + b.append( f , 1 ); } _fieldsObj = b.obj(); @@ -254,7 +275,7 @@ file.getline( line , BUF_SIZE ); const char * cur = line; while ( isspace( cur[0] ) ) cur++; - if ( strlen( cur ) == 0 ) + if ( cur[0] == '\0' ) continue; _fields.push_back( cur ); @@ -286,6 +307,105 @@ throw UserException( 9997 , (string)"auth failed: " + errmsg ); } + BSONTool::BSONTool( const char * name , bool objcheck ) + : Tool( name , true , "" , "" ) , _objcheck( objcheck ){ + + add_options() + ("objcheck" , "validate object before inserting" ) + ("filter" , po::value() , "filter to apply before inserting" ) + ; + } + + + int BSONTool::run(){ + _objcheck = hasParam( "objcheck" ); + + if ( hasParam( "filter" ) ) + _matcher.reset( new Matcher( fromjson( getParam( "filter" ) ) ) ); + + return doRun(); + } + + long long BSONTool::processFile( const path& root ){ + string fileString = root.string(); + + long long fileLength = file_size( root ); + + if ( fileLength == 0 ) { + out() << "file " << fileString << " empty, skipping" << endl; + return 0; + } + + + FILE* file = fopen( fileString.c_str() , "rb" ); + if ( ! file ){ + log() << "error opening file: " << fileString << endl; + return 0; + } + +#if !defined(__sunos__) && defined(POSIX_FADV_SEQUENTIAL) + posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL); +#endif + + log(1) << "\t file size: " << fileLength << endl; + + long long read = 0; + long long num = 0; + long long processed = 0; + + const int BUF_SIZE = 1024 * 1024 * 5; + boost::scoped_array buf_holder(new char[BUF_SIZE]); + char * buf = buf_holder.get(); + + ProgressMeter m( fileLength ); + + while ( read < fileLength ) { + int readlen = fread(buf, 4, 1, file); + int size = ((int*)buf)[0]; + if ( size >= BUF_SIZE ){ + cerr << "got an object of size: " << size << " terminating..." << endl; + } + uassert( 10264 , "invalid object size" , size < BUF_SIZE ); + + readlen = fread(buf+4, size-4, 1, file); + + BSONObj o( buf ); + if ( _objcheck && ! o.valid() ){ + cerr << "INVALID OBJECT - going try and pring out " << endl; + cerr << "size: " << size << endl; + BSONObjIterator i(o); + while ( i.more() ){ + BSONElement e = i.next(); + try { + e.validate(); + } + catch ( ... ){ + cerr << "\t\t NEXT ONE IS INVALID" << endl; + } + cerr << "\t name : " << e.fieldName() << " " << e.type() << endl; + cerr << "\t " << e << endl; + } + } + + if ( _matcher.get() == 0 || _matcher->matches( o ) ){ + gotObject( o ); + processed++; + } + + read += o.objsize(); + num++; + + m.hit( o.objsize() ); + } + + uassert( 10265 , "counts don't match" , m.done() == fileLength ); + out() << "\t " << m.hits() << " objects found" << endl; + if ( _matcher.get() ) + out() << "\t " << processed << " objects processed" << endl; + return processed; + } + + void setupSignals(){} } diff -Nru mongodb-1.4.4/tools/tool.h mongodb-1.6.3/tools/tool.h --- mongodb-1.4.4/tools/tool.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/tools/tool.h 2010-09-24 10:02:42.000000000 -0700 @@ -35,7 +35,8 @@ class Tool { public: - Tool( string name , bool localDBAllowed=true, string defaultDB="test" , string defaultCollection=""); + Tool( string name , bool localDBAllowed=true, string defaultDB="test" , + string defaultCollection="", bool usesstdout=true ); virtual ~Tool(); int main( int argc , char ** argv ); @@ -71,12 +72,15 @@ } return _db + "." + _coll; } + + virtual void preSetup(){} virtual int run() = 0; virtual void printHelp(ostream &out); - virtual void printExtraHelp( ostream & out ); + virtual void printExtraHelp( ostream & out ){} + virtual void printExtraHelpAfter( ostream & out ){} protected: @@ -90,6 +94,10 @@ string _username; string _password; + + bool _usesstdout; + bool _noconnection; + bool _autoreconnect; void addFieldOptions(); void needFields(); @@ -98,8 +106,10 @@ BSONObj _fieldsObj; - private: string _host; + + protected: + mongo::DBClientBase * _conn; bool _paired; @@ -111,4 +121,20 @@ }; + class BSONTool : public Tool { + bool _objcheck; + auto_ptr _matcher; + + public: + BSONTool( const char * name , bool objcheck = false ); + + virtual int doRun() = 0; + virtual void gotObject( const BSONObj& obj ) = 0; + + virtual int run(); + + long long processFile( const path& file ); + + }; + } diff -Nru mongodb-1.4.4/util/allocator.h mongodb-1.6.3/util/allocator.h --- mongodb-1.4.4/util/allocator.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/allocator.h 2010-09-24 10:02:42.000000000 -0700 @@ -31,7 +31,9 @@ return x; } -#define malloc mongo::ourmalloc -#define realloc mongo::ourrealloc +#define MONGO_malloc mongo::ourmalloc +#define malloc MONGO_malloc +#define MONGO_realloc mongo::ourrealloc +#define realloc MONGO_realloc } // namespace mongo diff -Nru mongodb-1.4.4/util/array.h mongodb-1.6.3/util/array.h --- mongodb-1.4.4/util/array.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/array.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,5 +1,21 @@ // array.h +/* + * Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + namespace mongo { template @@ -70,7 +86,7 @@ return _it->_data[_pos]; } - operator string() const { + string toString() const { stringstream ss; ss << _pos; return ss.str(); diff -Nru mongodb-1.4.4/util/assert_util.cpp mongodb-1.6.3/util/assert_util.cpp --- mongodb-1.4.4/util/assert_util.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/assert_util.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,10 +15,20 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "assert_util.h" #include "assert.h" #include "file.h" +#include +using namespace std; + +#ifndef _WIN32 +#include +#include +#endif + +//#include "../bson/bson.h" +#include "../db/jsobj.h" namespace mongo { @@ -41,6 +51,17 @@ if ( newvalue >= max ) rollover(); } + + void ExceptionInfo::append( BSONObjBuilder& b , const char * m , const char * c ) const { + if ( msg.empty() ) + b.append( m , "unknown assertion" ); + else + b.append( m , msg ); + + if ( code ) + b.append( c , code ); + } + string getDbContext(); @@ -63,8 +84,7 @@ lastAssert[0].set(msg, getDbContext().c_str(), file, line); stringstream temp; temp << "assertion " << file << ":" << line; - AssertionException e; - e.msg = temp.str(); + AssertionException e(temp.str(),0); breakpoint(); throw e; } @@ -74,13 +94,8 @@ raiseError(0,msg); } - int uacount = 0; void uasserted(int msgid, const char *msg) { assertionCount.condrollover( ++assertionCount.user ); - if ( ++uacount < 100 ) - log() << "User Exception " << msgid << ":" << msg << endl; - else - RARELY log() << "User Exception " << msg << endl; lastAssert[3].set(msg, getDbContext().c_str(), "", 0); raiseError(msgid,msg); throw UserException(msgid, msg); @@ -88,11 +103,19 @@ void msgasserted(int msgid, const char *msg) { assertionCount.condrollover( ++assertionCount.warning ); - log() << "Assertion: " << msgid << ":" << msg << endl; + tlog() << "Assertion: " << msgid << ":" << msg << endl; lastAssert[2].set(msg, getDbContext().c_str(), "", 0); raiseError(msgid,msg && *msg ? msg : "massert failure"); breakpoint(); - printStackTrace(); // TEMP?? should we get rid of this? TODO + printStackTrace(); + throw MsgAssertionException(msgid, msg); + } + + void msgassertedNoTrace(int msgid, const char *msg) { + assertionCount.condrollover( ++assertionCount.warning ); + log() << "Assertion: " << msgid << ":" << msg << endl; + lastAssert[2].set(msg, getDbContext().c_str(), "", 0); + raiseError(msgid,msg && *msg ? msg : "massert failure"); throw MsgAssertionException(msgid, msg); } @@ -100,12 +123,11 @@ stringstream ss; // errno might not work on all systems for streams // if it doesn't for a system should deal with here - ss << msg << " stream invalie: " << OUTPUT_ERRNO; + ss << msg << " stream invalid: " << errnoWithDescription(); throw UserException( code , ss.str() ); } - - mongo::mutex *Assertion::_mutex = new mongo::mutex(); + mongo::mutex *Assertion::_mutex = new mongo::mutex("Assertion"); string Assertion::toString() { if( _mutex == 0 ) @@ -125,89 +147,31 @@ return ss.str(); } - - class LoggingManager { - public: - LoggingManager() - : _enabled(0) , _file(0) { - } - - void start( const string& lp , bool append ){ - uassert( 10268 , "LoggingManager already started" , ! _enabled ); - _append = append; - - // test path - FILE * test = fopen( lp.c_str() , _append ? "a" : "w" ); - if ( ! test ){ - cout << "can't open [" << lp << "] for log file" << endl; - dbexit( EXIT_BADOPTIONS ); - assert( 0 ); - } - fclose( test ); - - _path = lp; - _enabled = 1; - rotate(); - } - - void rotate(){ - if ( ! _enabled ){ - cout << "LoggingManager not enabled" << endl; - return; - } + string errnoWithPrefix( const char * prefix ){ + stringstream ss; + if ( prefix ) + ss << prefix << ": "; + ss << errnoWithDescription(); + return ss.str(); + } - if ( _file ){ + + string demangleName( const type_info& typeinfo ){ #ifdef _WIN32 - cout << "log rotation doesn't work on windows" << endl; - return; + return typeinfo.name(); #else - struct tm t; - localtime_r( &_opened , &t ); - - stringstream ss; - ss << _path << "." << ( 1900 + t.tm_year ) << "-" << t.tm_mon << "-" << t.tm_mday - << "_" << t.tm_hour << "-" << t.tm_min << "-" << t.tm_sec; - string s = ss.str(); - rename( _path.c_str() , s.c_str() ); -#endif - } - - _file = freopen( _path.c_str() , _append ? "a" : "w" , stdout ); - if ( ! _file ){ - cerr << "can't open: " << _path.c_str() << " for log file" << endl; - dbexit( EXIT_BADOPTIONS ); - assert(0); - } - _opened = time(0); - } - - private: - - bool _enabled; - string _path; - bool _append; + int status; - FILE * _file; - time_t _opened; - - } loggingManager; - - void initLogging( const string& lp , bool append ){ - cout << "all output going to: " << lp << endl; - loggingManager.start( lp , append ); - } - - void rotateLogs( int signal ){ - loggingManager.rotate(); + char * niceName = abi::__cxa_demangle(typeinfo.name(), 0, 0, &status); + if ( ! niceName ) + return typeinfo.name(); + + string s = niceName; + free(niceName); + return s; +#endif } - string errnostring( const char * prefix ){ - stringstream ss; - if ( prefix ) - ss << prefix << ": "; - ss << OUTPUT_ERRNO; - return ss.str(); - } } diff -Nru mongodb-1.4.4/util/assert_util.h mongodb-1.6.3/util/assert_util.h --- mongodb-1.4.4/util/assert_util.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/assert_util.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,6 +22,11 @@ namespace mongo { + enum CommonErrorCodes { + DatabaseDifferCaseCode = 13297 , + StaleConfigInContextCode = 13388 + }; + /* these are manipulated outside of mutexes, so be careful */ struct Assertion { Assertion() { @@ -81,68 +86,84 @@ }; extern AssertionCount assertionCount; + + struct ExceptionInfo { + ExceptionInfo() : msg(""),code(-1){} + ExceptionInfo( const char * m , int c ) + : msg( m ) , code( c ){ + } + ExceptionInfo( const string& m , int c ) + : msg( m ) , code( c ){ + } + + void append( BSONObjBuilder& b , const char * m = "$err" , const char * c = "code" ) const ; + + string toString() const { stringstream ss; ss << "exception: " << code << " " << msg; return ss.str(); } + + bool empty() const { return msg.empty(); } + + + string msg; + int code; + }; class DBException : public std::exception { public: - virtual const char* what() const throw() = 0; + DBException( const ExceptionInfo& ei ) : _ei(ei){} + DBException( const char * msg , int code ) : _ei(msg,code){} + DBException( const string& msg , int code ) : _ei(msg,code){} + virtual ~DBException() throw() { } + + virtual const char* what() const throw(){ return _ei.msg.c_str(); } + virtual int getCode() const { return _ei.code; } + + virtual void appendPrefix( stringstream& ss ) const { } + virtual string toString() const { - return what(); + stringstream ss; ss << getCode() << " " << what(); return ss.str(); + return ss.str(); } - virtual int getCode() = 0; - operator string() const { return toString(); } + + const ExceptionInfo& getInfo() const { return _ei; } + + protected: + ExceptionInfo _ei; }; class AssertionException : public DBException { public: - int code; - string msg; - AssertionException() { code = 0; } + + AssertionException( const ExceptionInfo& ei ) : DBException(ei){} + AssertionException( const char * msg , int code ) : DBException(msg,code){} + AssertionException( const string& msg , int code ) : DBException(msg,code){} + virtual ~AssertionException() throw() { } - virtual bool severe() { - return true; - } - virtual bool isUserAssertion() { - return false; - } - virtual int getCode(){ return code; } - virtual const char* what() const throw() { return msg.c_str(); } + + virtual bool severe() { return true; } + virtual bool isUserAssertion() { return false; } /* true if an interrupted exception - see KillCurrentOp */ bool interrupted() { - return code == 11600 || code == 11601; + return _ei.code == 11600 || _ei.code == 11601; } }; - + /* UserExceptions are valid errors that a user can cause, like out of disk space or duplicate key */ class UserException : public AssertionException { public: - UserException(int c , const string& m) { - code = c; - msg = m; - } - virtual bool severe() { - return false; - } - virtual bool isUserAssertion() { - return true; - } - virtual string toString() const { - return "userassert:" + msg; - } - }; + UserException(int c , const string& m) : AssertionException( m , c ){} + virtual bool severe() { return false; } + virtual bool isUserAssertion() { return true; } + virtual void appendPrefix( stringstream& ss ) const { ss << "userassert:"; } + }; + class MsgAssertionException : public AssertionException { public: - MsgAssertionException(int c, const char *m) { - code = c; - msg = m; - } - virtual bool severe() { - return false; - } - virtual string toString() const { - return "massert:" + msg; - } + MsgAssertionException( const ExceptionInfo& ei ) : AssertionException( ei ){} + MsgAssertionException(int c, const string& m) : AssertionException( m , c ){} + virtual bool severe() { return false; } + virtual void appendPrefix( stringstream& ss ) const { ss << "massert:"; } }; void asserted(const char *msg, const char *file, unsigned line); @@ -150,6 +171,7 @@ void uasserted(int msgid, const char *msg); inline void uasserted(int msgid , string msg) { uasserted(msgid, msg.c_str()); } void uassert_nothrow(const char *msg); // reported via lasterror, but don't throw exception + void msgassertedNoTrace(int msgid, const char *msg); void msgasserted(int msgid, const char *msg); inline void msgasserted(int msgid, string msg) { msgasserted(msgid, msg.c_str()); } @@ -157,59 +179,67 @@ #undef assert #endif -#define assert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) +#define MONGO_assert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) +#define assert MONGO_assert /* "user assert". if asserts, user did something wrong, not our code */ -//#define uassert( 10269 , _Expression) (void)( (!!(_Expression)) || (uasserted(#_Expression, __FILE__, __LINE__), 0) ) -#define uassert(msgid, msg,_Expression) (void)( (!!(_Expression)) || (mongo::uasserted(msgid, msg), 0) ) - -#define xassert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) - -#define yassert 1 +#define MONGO_uassert(msgid, msg, expr) (void)( (!!(expr)) || (mongo::uasserted(msgid, msg), 0) ) +#define uassert MONGO_uassert /* warning only - keeps going */ -#define wassert(_Expression) (void)( (!!(_Expression)) || (mongo::wasserted(#_Expression, __FILE__, __LINE__), 0) ) +#define MONGO_wassert(_Expression) (void)( (!!(_Expression)) || (mongo::wasserted(#_Expression, __FILE__, __LINE__), 0) ) +#define wassert MONGO_wassert /* display a message, no context, and throw assertionexception easy way to throw an exception and log something without our stack trace display happening. */ -#define massert(msgid, msg,_Expression) (void)( (!!(_Expression)) || (mongo::msgasserted(msgid, msg), 0) ) +#define MONGO_massert(msgid, msg, expr) (void)( (!!(expr)) || (mongo::msgasserted(msgid, msg), 0) ) +#define massert MONGO_massert /* dassert is 'debug assert' -- might want to turn off for production as these could be slow. */ #if defined(_DEBUG) -#define dassert assert +# define MONGO_dassert assert #else -#define dassert(x) +# define MONGO_dassert(x) #endif +#define dassert MONGO_dassert // some special ids that we want to duplicate // > 10000 asserts // < 10000 UserException -#define ASSERT_ID_DUPKEY 11000 + enum { ASSERT_ID_DUPKEY = 11000 }; + /* throws a uassertion with an appropriate msg */ void streamNotGood( int code , string msg , std::ios& myios ); -#define ASSERT_STREAM_GOOD(msgid,msg,stream) (void)( (!!((stream).good())) || (mongo::streamNotGood(msgid, msg, stream), 0) ) + inline void assertStreamGood(unsigned msgid, string msg, std::ios& myios) { + if( !myios.good() ) streamNotGood(msgid, msg, myios); + } + + string demangleName( const type_info& typeinfo ); } // namespace mongo -#define BOOST_CHECK_EXCEPTION( expression ) \ +#define BOOST_CHECK_EXCEPTION MONGO_BOOST_CHECK_EXCEPTION +#define MONGO_BOOST_CHECK_EXCEPTION( expression ) \ try { \ expression; \ } catch ( const std::exception &e ) { \ - problem() << "caught boost exception: " << e.what() << endl; \ - assert( false ); \ + stringstream ss; \ + ss << "caught boost exception: " << e.what(); \ + msgasserted( 13294 , ss.str() ); \ } catch ( ... ) { \ massert( 10437 , "unknown boost failed" , false ); \ } -#define DESTRUCTOR_GUARD( expression ) \ +#define DESTRUCTOR_GUARD MONGO_DESTRUCTOR_GUARD +#define MONGO_DESTRUCTOR_GUARD( expression ) \ try { \ expression; \ } catch ( const std::exception &e ) { \ diff -Nru mongodb-1.4.4/util/atomic_int.h mongodb-1.6.3/util/atomic_int.h --- mongodb-1.4.4/util/atomic_int.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/atomic_int.h 1969-12-31 16:00:00.000000000 -0800 @@ -1,100 +0,0 @@ -// atomic_int.h -// atomic wrapper for unsigned - -/* Copyright 2009 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#if defined(_WIN32) -# include -#endif - -namespace mongo{ - - - struct AtomicUInt{ - AtomicUInt() : x(0) {} - AtomicUInt(unsigned z) : x(z) { } - volatile unsigned x; - operator unsigned() const { - return x; - } - inline AtomicUInt operator++(); // ++prefix - inline AtomicUInt operator++(int);// postfix++ - inline AtomicUInt operator--(); // --prefix - inline AtomicUInt operator--(int); // postfix-- - }; - -#if defined(_WIN32) - AtomicUInt AtomicUInt::operator++(){ - // InterlockedIncrement returns the new value - return InterlockedIncrement((volatile long*)&x); //long is 32bits in Win64 - } - AtomicUInt AtomicUInt::operator++(int){ - return InterlockedIncrement((volatile long*)&x)-1; - } - AtomicUInt AtomicUInt::operator--(){ - return InterlockedDecrement((volatile long*)&x); - } - AtomicUInt AtomicUInt::operator--(int){ - return InterlockedDecrement((volatile long*)&x)+1; - } -#elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) - // this is in GCC >= 4.1 - AtomicUInt AtomicUInt::operator++(){ - return __sync_add_and_fetch(&x, 1); - } - AtomicUInt AtomicUInt::operator++(int){ - return __sync_fetch_and_add(&x, 1); - } - AtomicUInt AtomicUInt::operator--(){ - return __sync_add_and_fetch(&x, -1); - } - AtomicUInt AtomicUInt::operator--(int){ - return __sync_fetch_and_add(&x, -1); - } -#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) - // from boost 1.39 interprocess/detail/atomic.hpp - - inline unsigned atomic_int_helper(volatile unsigned *x, int val){ - int r; - asm volatile - ( - "lock\n\t" - "xadd %1, %0": - "+m"( *x ), "=r"( r ): // outputs (%0, %1) - "1"( val ): // inputs (%2 == %1) - "memory", "cc" // clobbers - ); - return r; - } - AtomicUInt AtomicUInt::operator++(){ - return atomic_int_helper(&x, 1)+1; - } - AtomicUInt AtomicUInt::operator++(int){ - return atomic_int_helper(&x, 1); - } - AtomicUInt AtomicUInt::operator--(){ - return atomic_int_helper(&x, -1)-1; - } - AtomicUInt AtomicUInt::operator--(int){ - return atomic_int_helper(&x, -1); - } -#else -# error "unsupported compiler or platform" -#endif - -} // namespace mongo diff -Nru mongodb-1.4.4/util/background.cpp mongodb-1.6.3/util/background.cpp --- mongodb-1.4.4/util/background.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/background.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,14 +15,15 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "goodies.h" #include "background.h" +#include namespace mongo { BackgroundJob *BackgroundJob::grab = 0; - mongo::mutex BackgroundJob::mutex; + mongo::mutex BackgroundJob::mutex("BackgroundJob"); /* static */ void BackgroundJob::thr() { @@ -31,9 +32,25 @@ assert( us->state == NotStarted ); us->state = Running; grab = 0; - us->run(); + + { + string nm = us->name(); + setThreadName(nm.c_str()); + } + + try { + us->run(); + } + catch ( std::exception& e ){ + log( LL_ERROR ) << "backgroundjob error: " << e.what() << endl; + } + catch(...) { + log( LL_ERROR ) << "uncaught exception in BackgroundJob" << endl; + } us->state = Done; - if ( us->deleteSelf ) + bool delSelf = us->deleteSelf; + us->ending(); + if( delSelf ) delete us; } @@ -47,18 +64,55 @@ return *this; } - bool BackgroundJob::wait(int msMax) { - assert( state != NotStarted ); - int ms = 1; + bool BackgroundJob::wait(int msMax, unsigned maxsleep) { + unsigned ms = 1; Date_t start = jsTime(); while ( state != Done ) { sleepmillis(ms); - if ( ms < 1000 ) - ms = ms * 2; + if( ms*2 msMax) ) return false; } return true; } + void BackgroundJob::go(list& L) { + for( list::iterator i = L.begin(); i != L.end(); i++ ) + (*i)->go(); + } + + /* wait for several jobs to finish. */ + void BackgroundJob::wait(list& L, unsigned maxsleep) { + unsigned ms = 1; + { + x: + sleepmillis(ms); + if( ms*2::iterator i = L.begin(); i != L.end(); i++ ) { + assert( (*i)->state != NotStarted ); + if( (*i)->state != Done ) + goto x; + } + } + } + + void PeriodicBackgroundJob::run(){ + // want to handle first one differently so inShutdown is obeyed nicely + sleepmillis( _millis ); + + while ( ! inShutdown() ){ + try { + runLoop(); + } + catch ( std::exception& e ){ + log( LL_ERROR ) << "PeriodicBackgroundJob [" << name() << "] error: " << e.what() << endl; + } + catch ( ... ){ + log( LL_ERROR ) << "PeriodicBackgroundJob [" << name() << "] unknown error" << endl; + } + + sleepmillis( _millis ); + } + } + } // namespace mongo diff -Nru mongodb-1.4.4/util/background.h mongodb-1.6.3/util/background.h --- mongodb-1.4.4/util/background.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/background.h 2010-09-24 10:02:42.000000000 -0700 @@ -19,31 +19,47 @@ namespace mongo { - /* object-orienty background thread dispatching. + /** object-orienty background thread dispatching. subclass and define run() - It is ok to call go() more than once -- if the previous invocation - has finished. Thus one pattern of use is to embed a backgroundjob - in your object and reuse it (or same thing with inheritance). + It is ok to call go(), that is, run the job, more than once -- if the + previous invocation has finished. Thus one pattern of use is to embed + a backgroundjob in your object and reuse it (or same thing with + inheritance). Each go() call spawns a new thread. + + note when job destructs, the thread is not terminated if still running. + generally if the thread could still be running, allocate the job dynamically + and set deleteSelf to true. */ - class BackgroundJob { + /* example + class ConnectBG : public BackgroundJob { + public: + int sock; + int res; + SockAddr farEnd; + void run() { + res = ::connect(sock, farEnd.raw(), farEnd.addressSize); + } + }; + */ + class BackgroundJob : boost::noncopyable { protected: - /* define this to do your work! */ + /** define this to do your work. + after this returns, state is set to done. + after this returns, deleted if deleteSelf true. + */ virtual void run() = 0; - + virtual string name() = 0; + virtual void ending() { } // hook for post processing if desired after everything else done. not called when deleteSelf=true public: enum State { NotStarted, Running, Done }; - State getState() const { - return state; - } - bool running() const { - return state == Running; - } + State getState() const { return state; } + bool running() const { return state == Running; } bool deleteSelf; // delete self when Done? @@ -53,14 +69,20 @@ } virtual ~BackgroundJob() { } - // start job. returns before it's finished. + // starts job. returns once it is "dispatched" BackgroundJob& go(); // wait for completion. this spins with sleep() so not terribly efficient. // returns true if did not time out. // // note you can call wait() more than once if the first call times out. - bool wait(int msMax = 0); + bool wait(int msMax = 0, unsigned maxSleepInterval=1000); + + /* start several */ + static void go(list&); + + /* wait for several jobs to finish. */ + static void wait(list&, unsigned maxSleepInterval=1000); private: static BackgroundJob *grab; @@ -69,4 +91,23 @@ volatile State state; }; + class PeriodicBackgroundJob : public BackgroundJob { + public: + PeriodicBackgroundJob( int millisToSleep ) + : _millis( millisToSleep ){ + } + + virtual ~PeriodicBackgroundJob(){} + + /** this gets called every millisToSleep ms */ + virtual void runLoop() = 0; + + virtual void run(); + + + private: + int _millis; + + }; + } // namespace mongo diff -Nru mongodb-1.4.4/util/base64.cpp mongodb-1.6.3/util/base64.cpp --- mongodb-1.4.4/util/base64.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/base64.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -16,7 +16,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "base64.h" namespace mongo { diff -Nru mongodb-1.4.4/util/builder.h mongodb-1.6.3/util/builder.h --- mongodb-1.4.4/util/builder.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/builder.h 1969-12-31 16:00:00.000000000 -0800 @@ -1,211 +0,0 @@ -/* builder.h */ - -/* Copyright 2009 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "../stdafx.h" -#include - -namespace mongo { - - class StringBuilder; - - class BufBuilder { - public: - BufBuilder(int initsize = 512) : size(initsize) { - if ( size > 0 ) { - data = (char *) malloc(size); - assert(data); - } else { - data = 0; - } - l = 0; - } - ~BufBuilder() { - kill(); - } - - void kill() { - if ( data ) { - free(data); - data = 0; - } - } - - void reset( int maxSize = 0 ){ - l = 0; - if ( maxSize && size > maxSize ){ - free(data); - data = (char*)malloc(maxSize); - size = maxSize; - } - - } - - /* leave room for some stuff later */ - void skip(int n) { - grow(n); - } - - /* note this may be deallocated (realloced) if you keep writing. */ - char* buf() { - return data; - } - - /* assume ownership of the buffer - you must then free it */ - void decouple() { - data = 0; - } - - template void append(T j) { - *((T*)grow(sizeof(T))) = j; - } - void append(short j) { - append(j); - } - void append(int j) { - append(j); - } - void append(unsigned j) { - append(j); - } - void append(bool j) { - append(j); - } - void append(double j) { - append(j); - } - - void append(const void *src, size_t len) { - memcpy(grow(len), src, len); - } - - void append(const char *str) { - append((void*) str, strlen(str)+1); - } - - void append(const string &str) { - append( (void *)str.c_str(), str.length() + 1 ); - } - - void append( int val , int padding ){ - - } - - int len() const { - return l; - } - - void setlen( int newLen ){ - l = newLen; - } - - private: - /* returns the pre-grow write position */ - char* grow(int by) { - int oldlen = l; - l += by; - if ( l > size ) { - int a = size * 2; - if ( a == 0 ) - a = 512; - if ( l > a ) - a = l + 16 * 1024; - assert( a < 64 * 1024 * 1024 ); - data = (char *) realloc(data, a); - size= a; - } - return data + oldlen; - } - - char *data; - int l; - int size; - - friend class StringBuilder; - }; - - class StringBuilder { - public: - StringBuilder( int initsize=256 ) - : _buf( initsize ){ - } - -#define SBNUM(val,maxSize,macro) \ - int prev = _buf.l; \ - int z = sprintf( _buf.grow(maxSize) , macro , (val) ); \ - _buf.l = prev + z; \ - return *this; - - - StringBuilder& operator<<( double x ){ - SBNUM( x , 25 , "%g" ); - } - StringBuilder& operator<<( int x ){ - SBNUM( x , 11 , "%d" ); - } - StringBuilder& operator<<( unsigned x ){ - SBNUM( x , 11 , "%u" ); - } - StringBuilder& operator<<( long x ){ - SBNUM( x , 22 , "%ld" ); - } - StringBuilder& operator<<( unsigned long x ){ - SBNUM( x , 22 , "%lu" ); - } - StringBuilder& operator<<( long long x ){ - SBNUM( x , 22 , "%lld" ); - } - StringBuilder& operator<<( unsigned long long x ){ - SBNUM( x , 22 , "%llu" ); - } - StringBuilder& operator<<( short x ){ - SBNUM( x , 8 , "%hd" ); - } - StringBuilder& operator<<( char c ){ - _buf.grow( 1 )[0] = c; - return *this; - } - - void append( const char * str ){ - int x = strlen( str ); - memcpy( _buf.grow( x ) , str , x ); - } - StringBuilder& operator<<( const char * str ){ - append( str ); - return *this; - } - StringBuilder& operator<<( const string& s ){ - append( s.c_str() ); - return *this; - } - - // access - - void reset( int maxSize = 0 ){ - _buf.reset( maxSize ); - } - - string str(){ - return string(_buf.data, _buf.l); - } - - private: - BufBuilder _buf; - }; - -} // namespace mongo diff -Nru mongodb-1.4.4/util/concurrency/list.h mongodb-1.6.3/util/concurrency/list.h --- mongodb-1.4.4/util/concurrency/list.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/list.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,81 @@ +// list.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +namespace mongo { + +/* this class uses a mutex for writes, but not for reads. + we can get fancier later... + + struct Member : public List1::Base { + const char *host; + int port; + }; + List1 _members; + _members.head()->next(); + +*/ +template +class List1 : boost::noncopyable { +public: + /* next() and head() return 0 at end of list */ + + List1() : _head(0), _m("List1"), _orphans(0) { } + + class Base { + friend class List1; + T *_next; + public: + T* next() const { return _next; } + }; + + T* head() const { return _head; } + + void push(T* t) { + scoped_lock lk(_m); + t->_next = _head; + _head = t; + } + + // intentionally leak. + void orphanAll() { + _head = 0; + } + + /* t is not deleted, but is removed from the list. (orphaned) */ + void orphan(T* t) { + scoped_lock lk(_m); + T *&prev = _head; + T *n = prev; + while( n != t ) { + prev = n->_next; + n = prev; + } + prev = t->_next; + if( ++_orphans > 500 ) + log() << "warning orphans=" << _orphans << '\n'; + } + +private: + T *_head; + mutex _m; + int _orphans; +}; + +}; diff -Nru mongodb-1.4.4/util/concurrency/msg.h mongodb-1.6.3/util/concurrency/msg.h --- mongodb-1.4.4/util/concurrency/msg.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/msg.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,61 @@ +// @file msg.h - interthread message passing + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include +#include "task.h" + +namespace mongo { + + namespace task { + + typedef boost::function lam; + + /** typical usage is: task::fork( new Server("threadname") ); */ + class Server : public Task { + public: + /** send a message to the port */ + void send(lam); + + Server(string name) : _name(name), rq(false) { } + virtual ~Server() { } + + /** send message but block until function completes */ + void call(const lam&); + + void requeue() { rq = true; } + + protected: + /* REMINDER : for use in mongod, you will want to have this call Client::initThread(). */ + virtual void starting() { } + + private: + virtual bool initClient() { return true; } + virtual string name() { return _name; } + void doWork(); + deque d; + boost::mutex m; + boost::condition c; + string _name; + bool rq; + }; + + } + +} diff -Nru mongodb-1.4.4/util/concurrency/mutex.h mongodb-1.6.3/util/concurrency/mutex.h --- mongodb-1.4.4/util/concurrency/mutex.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/mutex.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,179 @@ +// @file mutex.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace mongo { + + extern bool __destroyingStatics; + class mutex; + + // only used on _DEBUG builds: + class MutexDebugger { + typedef const char * mid; // mid = mutex ID + typedef map Preceeding; + map< mid, int > maxNest; + boost::thread_specific_ptr< Preceeding > us; + map< mid, set > followers; + boost::mutex &x; + unsigned magic; + public: + // set these to create an assert that + // b must never be locked before a + // so + // a.lock(); b.lock(); is fine + // b.lock(); alone is fine too + // only checked on _DEBUG builds. + string a,b; + + void aBreakPoint(){} + void programEnding(); + MutexDebugger(); + void entering(mid m) { + if( magic != 0x12345678 ) return; + + Preceeding *_preceeding = us.get(); + if( _preceeding == 0 ) + us.reset( _preceeding = new Preceeding() ); + Preceeding &preceeding = *_preceeding; + + if( a == m ) { + aBreakPoint(); + if( preceeding[b.c_str()] ) { + cout << "mutex problem " << b << " was locked before " << a << endl; + assert(false); + } + } + + preceeding[m]++; + if( preceeding[m] > 1 ) { + // recursive re-locking. + if( preceeding[m] > maxNest[m] ) + maxNest[m] = preceeding[m]; + return; + } + + bool failed = false; + string err; + { + boost::mutex::scoped_lock lk(x); + followers[m]; + for( Preceeding::iterator i = preceeding.begin(); i != preceeding.end(); i++ ) { + if( m != i->first && i->second > 0 ) { + followers[i->first].insert(m); + if( followers[m].count(i->first) != 0 ){ + failed = true; + stringstream ss; + mid bad = i->first; + ss << "mutex problem" << + "\n when locking " << m << + "\n " << bad << " was already locked and should not be." + "\n set a and b above to debug.\n"; + stringstream q; + for( Preceeding::iterator i = preceeding.begin(); i != preceeding.end(); i++ ) { + if( i->first != m && i->first != bad && i->second > 0 ) + q << " " << i->first << '\n'; + } + string also = q.str(); + if( !also.empty() ) + ss << "also locked before " << m << " in this thread (no particular order):\n" << also; + err = ss.str(); + break; + } + } + } + } + if( failed ) { + cout << err << endl; + assert( 0 ); + } + } + void leaving(mid m) { + if( magic != 0x12345678 ) return; + Preceeding& preceeding = *us.get(); + preceeding[m]--; + if( preceeding[m] < 0 ) { + cout << "ERROR: lock count for " << m << " is " << preceeding[m] << endl; + assert( preceeding[m] >= 0 ); + } + } + }; + extern MutexDebugger &mutexDebugger; + + // If you create a local static instance of this class, that instance will be destroyed + // before all global static objects are destroyed, so __destroyingStatics will be set + // to true before the global static variables are destroyed. + class StaticObserver : boost::noncopyable { + public: + ~StaticObserver() { __destroyingStatics = true; } + }; + + // On pthread systems, it is an error to destroy a mutex while held. Static global + // mutexes may be held upon shutdown in our implementation, and this way we avoid + // destroying them. + class mutex : boost::noncopyable { + public: +#if defined(_DEBUG) + const char *_name; +#endif + +#if defined(_DEBUG) + mutex(const char *name) + : _name(name) +#else + mutex(const char *) +#endif + { + _m = new boost::mutex(); + } + ~mutex() { + if( !__destroyingStatics ) { + delete _m; + } + } + class scoped_lock : boost::noncopyable { +#if defined(_DEBUG) + mongo::mutex *mut; +#endif + public: + scoped_lock( mongo::mutex &m ) : _l( m.boost() ) { +#if defined(_DEBUG) + mut = &m; + mutexDebugger.entering(mut->_name); +#endif + } + ~scoped_lock() { +#if defined(_DEBUG) + mutexDebugger.leaving(mut->_name); +#endif + } + boost::mutex::scoped_lock &boost() { return _l; } + private: + boost::mutex::scoped_lock _l; + }; + private: + boost::mutex &boost() { return *_m; } + boost::mutex *_m; + }; + + typedef mutex::scoped_lock scoped_lock; + typedef boost::recursive_mutex::scoped_lock recursive_scoped_lock; + +} diff -Nru mongodb-1.4.4/util/concurrency/mvar.h mongodb-1.6.3/util/concurrency/mvar.h --- mongodb-1.4.4/util/concurrency/mvar.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/mvar.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,116 @@ +// mvar.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace mongo { + + /* This is based on haskell's MVar synchronization primitive: + * http://www.haskell.org/ghc/docs/latest/html/libraries/base-4.2.0.0/Control-Concurrent-MVar.html + * + * It is a thread-safe queue that can hold at most one object. + * You can also think of it as a box that can be either full or empty. + */ + + template + class MVar { + public: + enum State {EMPTY=0, FULL}; + + // create an empty MVar + MVar() + : _state(EMPTY) + {} + + // creates a full MVar + MVar(const T& val) + : _state(FULL) + , _value(val) + {} + + // puts val into the MVar and returns true or returns false if full + // never blocks + bool tryPut(const T& val){ + // intentionally repeat test before and after lock + if (_state == FULL) return false; + Mutex::scoped_lock lock(_mutex); + if (_state == FULL) return false; + + _state = FULL; + _value = val; + + // unblock threads waiting to 'take' + _condition.notify_all(); + + return true; + } + + // puts val into the MVar + // will block if the MVar is already full + void put(const T& val){ + Mutex::scoped_lock lock(_mutex); + while (!tryPut(val)){ + // unlocks lock while waiting and relocks before returning + _condition.wait(lock); + } + } + + // takes val out of the MVar and returns true or returns false if empty + // never blocks + bool tryTake(T& out){ + // intentionally repeat test before and after lock + if (_state == EMPTY) return false; + Mutex::scoped_lock lock(_mutex); + if (_state == EMPTY) return false; + + _state = EMPTY; + out = _value; + + // unblock threads waiting to 'put' + _condition.notify_all(); + + return true; + } + + // takes val out of the MVar + // will block if the MVar is empty + T take(){ + T ret = T(); + + Mutex::scoped_lock lock(_mutex); + while (!tryTake(ret)){ + // unlocks lock while waiting and relocks before returning + _condition.wait(lock); + } + + return ret; + } + + + // Note: this is fast because there is no locking, but state could + // change before you get a chance to act on it. + // Mainly useful for sanity checks / asserts. + State getState(){ return _state; } + + + private: + State _state; + T _value; + typedef boost::recursive_mutex Mutex; + Mutex _mutex; + boost::condition _condition; + }; + +} diff -Nru mongodb-1.4.4/util/concurrency/readme.txt mongodb-1.6.3/util/concurrency/readme.txt --- mongodb-1.4.4/util/concurrency/readme.txt 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/readme.txt 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,15 @@ +util/concurrency/ files + +list.h - a list class that is lock-free for reads +rwlock.h - read/write locks (RWLock) +msg.h - message passing between threads +task.h - an abstraction around threads +mutex.h - small enhancements that wrap boost::mutex +thread_pool.h +mvar.h + This is based on haskell's MVar synchronization primitive: + http://www.haskell.org/ghc/docs/latest/html/libraries/base-4.2.0.0/Control-Concurrent-MVar.html + It is a thread-safe queue that can hold at most one object. + You can also think of it as a box that can be either full or empty. +value.h + Atomic wrapper for values/objects that are copy constructable / assignable diff -Nru mongodb-1.4.4/util/concurrency/rwlock.h mongodb-1.6.3/util/concurrency/rwlock.h --- mongodb-1.4.4/util/concurrency/rwlock.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/rwlock.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,220 @@ +// rwlock.h + +/* + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "mutex.h" + +#if BOOST_VERSION >= 103500 + #define BOOST_RWLOCK +#else + + #if defined(_WIN32) + #error need boost >= 1.35 for windows + #endif + + #include + +#endif + +#ifdef BOOST_RWLOCK +#include +#undef assert +#define assert MONGO_assert +#endif + +namespace mongo { + +#ifdef BOOST_RWLOCK + class RWLock { + boost::shared_mutex _m; + public: +#if defined(_DEBUG) + const char *_name; + RWLock(const char *name) : _name(name) { } +#else + RWLock(const char *) { } +#endif + void lock(){ + _m.lock(); +#if defined(_DEBUG) + mutexDebugger.entering(_name); +#endif + } + void unlock(){ +#if defined(_DEBUG) + mutexDebugger.leaving(_name); +#endif + _m.unlock(); + } + + void lock_shared(){ + _m.lock_shared(); + } + + void unlock_shared(){ + _m.unlock_shared(); + } + + bool lock_shared_try( int millis ){ + boost::system_time until = get_system_time(); + until += boost::posix_time::milliseconds(millis); + if( _m.timed_lock_shared( until ) ) { + return true; + } + return false; + } + + bool lock_try( int millis = 0 ){ + boost::system_time until = get_system_time(); + until += boost::posix_time::milliseconds(millis); + if( _m.timed_lock( until ) ) { +#if defined(_DEBUG) + mutexDebugger.entering(_name); +#endif + return true; + } + return false; + } + + + }; +#else + class RWLock { + pthread_rwlock_t _lock; + + inline void check( int x ){ + if( x == 0 ) + return; + log() << "pthread rwlock failed: " << x << endl; + assert( x == 0 ); + } + + public: +#if defined(_DEBUG) + const char *_name; + RWLock(const char *name) : _name(name) { +#else + RWLock(const char *) { +#endif + check( pthread_rwlock_init( &_lock , 0 ) ); + } + + ~RWLock(){ + if ( ! __destroyingStatics ){ + check( pthread_rwlock_destroy( &_lock ) ); + } + } + + void lock(){ + check( pthread_rwlock_wrlock( &_lock ) ); +#if defined(_DEBUG) + mutexDebugger.entering(_name); +#endif + } + void unlock(){ +#if defined(_DEBUG) + mutexDebugger.leaving(_name); +#endif + check( pthread_rwlock_unlock( &_lock ) ); + } + + void lock_shared(){ + check( pthread_rwlock_rdlock( &_lock ) ); + } + + void unlock_shared(){ + check( pthread_rwlock_unlock( &_lock ) ); + } + + bool lock_shared_try( int millis ){ + return _try( millis , false ); + } + + bool lock_try( int millis = 0 ){ + if( _try( millis , true ) ) { +#if defined(_DEBUG) + mutexDebugger.entering(_name); +#endif + return true; + } + return false; + } + + bool _try( int millis , bool write ){ + while ( true ) { + int x = write ? + pthread_rwlock_trywrlock( &_lock ) : + pthread_rwlock_tryrdlock( &_lock ); + + if ( x <= 0 ) { + return true; + } + + if ( millis-- <= 0 ) + return false; + + if ( x == EBUSY ){ + sleepmillis(1); + continue; + } + check(x); + } + + return false; + } + + }; + + +#endif + + class rwlock_try_write { + RWLock& _l; + public: + struct exception { }; + rwlock_try_write(RWLock& l, int millis = 0) : _l(l) { + if( !l.lock_try(millis) ) throw exception(); + } + ~rwlock_try_write() { _l.unlock(); } + }; + + /* scoped lock */ + struct rwlock { + rwlock( const RWLock& lock , bool write , bool alreadyHaveLock = false ) + : _lock( (RWLock&)lock ) , _write( write ){ + + if ( ! alreadyHaveLock ){ + if ( _write ) + _lock.lock(); + else + _lock.lock_shared(); + } + } + + ~rwlock(){ + if ( _write ) + _lock.unlock(); + else + _lock.unlock_shared(); + } + + RWLock& _lock; + bool _write; + }; +} diff -Nru mongodb-1.4.4/util/concurrency/spin_lock.cpp mongodb-1.6.3/util/concurrency/spin_lock.cpp --- mongodb-1.4.4/util/concurrency/spin_lock.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/spin_lock.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,66 @@ +// spin_lock.cpp + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include +#include "spin_lock.h" + +namespace mongo { + + SpinLock::SpinLock() : _locked( false ){} + + SpinLock::~SpinLock(){} + + void SpinLock::lock(){ +#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + + // fast path + if (!_locked && !__sync_lock_test_and_set(&_locked, true)) { + return; + } + + // wait for lock + int wait = 1000; + while ((wait-- > 0) && (_locked)) {} + + // if failed to grab lock, sleep + struct timespec t; + t.tv_sec = 0; + t.tv_nsec = 5000000; + while (__sync_lock_test_and_set(&_locked, true)) { + nanosleep(&t, NULL); + } +#else + + // WARNING "TODO Missing spin lock in this platform." + +#endif + } + + void SpinLock::unlock(){ +#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + + __sync_lock_release(&_locked); + +#else + + // WARNING "TODO Missing spin lock in this platform." + +#endif + } + +} // namespace mongo diff -Nru mongodb-1.4.4/util/concurrency/spin_lock.h mongodb-1.6.3/util/concurrency/spin_lock.h --- mongodb-1.4.4/util/concurrency/spin_lock.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/spin_lock.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,48 @@ +// spin_lock.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef CONCURRENCY_SPINLOCK_HEADER +#define CONCURRENCY_SPINLOCK_HEADER + +namespace mongo { + + /** + * BIG WARNING - COMPILES, BUT NOT READY FOR USE - BIG WARNING + * + * The spinlock currently requires late GCC support + * routines. Support for other platforms will be added soon. + */ + class SpinLock{ + public: + SpinLock(); + ~SpinLock(); + + void lock(); + void unlock(); + + private: + bool _locked; + + // Non-copyable, non-assignable + SpinLock(SpinLock&); + SpinLock& operator=(SpinLock&); + }; + +} // namespace mongo + +#endif // CONCURRENCY_SPINLOCK_HEADER diff -Nru mongodb-1.4.4/util/concurrency/task.cpp mongodb-1.6.3/util/concurrency/task.cpp --- mongodb-1.4.4/util/concurrency/task.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/task.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,176 @@ +// @file task.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "task.h" +#include "../goodies.h" +#include "../unittest.h" +#include "boost/thread/condition.hpp" + +namespace mongo { + + namespace task { + + /*void foo() { + boost::mutex m; + boost::mutex::scoped_lock lk(m); + boost::condition cond; + cond.wait(lk); + cond.notify_one(); + }*/ + + Task::Task() { + n = 0; + repeat = 0; + deleteSelf = true; + } + + void Task::halt() { repeat = 0; } + + void Task::run() { + assert( n == 0 ); + while( 1 ) { + n++; + try { + doWork(); + } + catch(...) { } + if( repeat == 0 ) + break; + sleepmillis(repeat); + if( inShutdown() ) + break; + } + } + + void Task::begin() { + go(); + } + + void fork(Task *t) { + t->begin(); + } + + void repeat(Task *t, unsigned millis) { + t->repeat = millis; + t->begin(); + } + + } +} + +#include "msg.h" + +/* task::Server */ + +namespace mongo { + namespace task { + + /* to get back a return value */ + struct Ret { + Ret() : done(false) { } + bool done; + boost::mutex m; + boost::condition c; + const lam *msg; + void f() { + (*msg)(); + done = true; + c.notify_one(); + } + }; + + void Server::call( const lam& msg ) { + Ret r; + r.msg = &msg; + lam f = boost::bind(&Ret::f, &r); + send(f); + { + boost::mutex::scoped_lock lk(r.m); + while( !r.done ) + r.c.wait(lk); + } + } + + void Server::send( lam msg ) { + { + boost::mutex::scoped_lock lk(m); + d.push_back(msg); + } + c.notify_one(); + } + + void Server::doWork() { + starting(); + while( 1 ) { + lam f; + try { + boost::mutex::scoped_lock lk(m); + while( d.empty() ) + c.wait(lk); + f = d.front(); + d.pop_front(); + } + catch(...) { + log() << "ERROR exception in Server:doWork?" << endl; + } + try { + f(); + if( rq ) { + rq = false; + { + boost::mutex::scoped_lock lk(m); + d.push_back(f); + } + } + } catch(std::exception& e) { + log() << "Server::doWork task:" << name() << " exception:" << e.what() << endl; + } + catch(const char *p) { + log() << "Server::doWork task:" << name() << " unknown c exception:" << + ((p&&strlen(p)<800)?p:"?") << endl; + } + catch(...) { + log() << "Server::doWork unknown exception task:" << name() << endl; + } + } + } + + static Server *s; + static void abc(int i) { + cout << "Hello " << i << endl; + s->requeue(); + } + class TaskUnitTest : public mongo::UnitTest { + public: + virtual void run() { + lam f = boost::bind(abc, 3); + //f(); + + s = new Server("unittest"); + fork(s); + s->send(f); + + sleepsecs(30); + cout <<" done" << endl; + + } + }; // not running. taskunittest; + + } +} diff -Nru mongodb-1.4.4/util/concurrency/task.h mongodb-1.6.3/util/concurrency/task.h --- mongodb-1.4.4/util/concurrency/task.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/task.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,72 @@ +// @file task.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "../background.h" + +namespace mongo { + + namespace task { + + /** abstraction around threads. simpler than BackgroundJob which is used behind the scenes. + allocate the Task dynamically. when the thread terminates, the Task object will delete itself. + */ + class Task : private BackgroundJob { + protected: + virtual void doWork() = 0; // implement the task here. + virtual string name() = 0; // name the threada + public: + Task(); + + /** for a repeating task, stop after current invocation ends. can be called by other threads + as long as the Task is still in scope. + */ + void halt(); + private: + unsigned n, repeat; + friend void fork(Task* t); + friend void repeat(Task* t, unsigned millis); + virtual void run(); + virtual void ending() { } + void begin(); + }; + + /** run once */ + void fork(Task *t); + + /** run doWork() over and over, with a pause between runs of millis */ + void repeat(Task *t, unsigned millis); + + /*** Example *** + inline void sample() { + class Sample : public Task { + public: + int result; + virtual void doWork() { result = 1234; } + Sample() : result(0) { } + }; + shared_ptr q( new Sample() ); + fork(q); + cout << q->result << endl; // could print 1234 or 0. + } + */ + + } + +} diff -Nru mongodb-1.4.4/util/concurrency/thread_pool.cpp mongodb-1.6.3/util/concurrency/thread_pool.cpp --- mongodb-1.4.4/util/concurrency/thread_pool.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/thread_pool.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,138 @@ +/* threadpool.cpp +*/ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pch.h" +#include "thread_pool.h" +#include "mvar.h" + +namespace mongo{ + namespace threadpool{ + + // Worker thread + class Worker : boost::noncopyable { + public: + explicit Worker(ThreadPool& owner) + : _owner(owner) + , _is_done(true) + , _thread(boost::bind(&Worker::loop, this)) + {} + + // destructor will block until current operation is completed + // Acts as a "join" on this thread + ~Worker(){ + _task.put(Task()); + _thread.join(); + } + + void set_task(Task& func){ + assert(!func.empty()); + assert(_is_done); + _is_done = false; + + _task.put(func); + } + + private: + ThreadPool& _owner; + MVar _task; + bool _is_done; // only used for error detection + boost::thread _thread; + + void loop(){ + while (true) { + Task task = _task.take(); + if (task.empty()) + break; // ends the thread + + try { + task(); + } catch (std::exception e){ + log() << "Unhandled exception in worker thread: " << e.what() << endl;; + } catch (...){ + log() << "Unhandled non-exception in worker thread" << endl; + } + _is_done = true; + _owner.task_done(this); + } + } + }; + + ThreadPool::ThreadPool(int nThreads) + : _mutex("ThreadPool"), _tasksRemaining(0) + , _nThreads(nThreads) + { + scoped_lock lock(_mutex); + while (nThreads-- > 0){ + Worker* worker = new Worker(*this); + _freeWorkers.push_front(worker); + } + } + + ThreadPool::~ThreadPool(){ + join(); + + assert(_tasks.empty()); + + // O(n) but n should be small + assert(_freeWorkers.size() == (unsigned)_nThreads); + + while(!_freeWorkers.empty()){ + delete _freeWorkers.front(); + _freeWorkers.pop_front(); + } + } + + void ThreadPool::join(){ + scoped_lock lock(_mutex); + while(_tasksRemaining){ + _condition.wait(lock.boost()); + } + } + + void ThreadPool::schedule(Task task){ + scoped_lock lock(_mutex); + + _tasksRemaining++; + + if (!_freeWorkers.empty()){ + _freeWorkers.front()->set_task(task); + _freeWorkers.pop_front(); + }else{ + _tasks.push_back(task); + } + } + + // should only be called by a worker from the worker thread + void ThreadPool::task_done(Worker* worker){ + scoped_lock lock(_mutex); + + if (!_tasks.empty()){ + worker->set_task(_tasks.front()); + _tasks.pop_front(); + }else{ + _freeWorkers.push_front(worker); + } + + _tasksRemaining--; + + if(_tasksRemaining == 0) + _condition.notify_all(); + } + + } //namespace threadpool +} //namespace mongo diff -Nru mongodb-1.4.4/util/concurrency/thread_pool.h mongodb-1.6.3/util/concurrency/thread_pool.h --- mongodb-1.4.4/util/concurrency/thread_pool.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/thread_pool.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,80 @@ +// thread_pool.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#undef assert +#define assert MONGO_assert + +namespace mongo { + +namespace threadpool { + class Worker; + + typedef boost::function Task; //nullary function or functor + + // exported to the mongo namespace + class ThreadPool : boost::noncopyable{ + public: + explicit ThreadPool(int nThreads=8); + + // blocks until all tasks are complete (tasks_remaining() == 0) + // You should not call schedule while in the destructor + ~ThreadPool(); + + // blocks until all tasks are complete (tasks_remaining() == 0) + // does not prevent new tasks from being scheduled so could wait forever. + // Also, new tasks could be scheduled after this returns. + void join(); + + // task will be copied a few times so make sure it's relatively cheap + void schedule(Task task); + + // Helpers that wrap schedule and boost::bind. + // Functor and args will be copied a few times so make sure it's relatively cheap + template + void schedule(F f, A a){ schedule(boost::bind(f,a)); } + template + void schedule(F f, A a, B b){ schedule(boost::bind(f,a,b)); } + template + void schedule(F f, A a, B b, C c){ schedule(boost::bind(f,a,b,c)); } + template + void schedule(F f, A a, B b, C c, D d){ schedule(boost::bind(f,a,b,c,d)); } + template + void schedule(F f, A a, B b, C c, D d, E e){ schedule(boost::bind(f,a,b,c,d,e)); } + + int tasks_remaining() { return _tasksRemaining; } + + private: + mongo::mutex _mutex; + boost::condition _condition; + + list _freeWorkers; //used as LIFO stack (always front) + list _tasks; //used as FIFO queue (push_back, pop_front) + int _tasksRemaining; // in queue + currently processing + int _nThreads; // only used for sanity checking. could be removed in the future. + + // should only be called by a worker from the worker's thread + void task_done(Worker* worker); + friend class Worker; + }; + +} //namespace threadpool + +using threadpool::ThreadPool; + +} //namespace mongo diff -Nru mongodb-1.4.4/util/concurrency/value.h mongodb-1.6.3/util/concurrency/value.h --- mongodb-1.4.4/util/concurrency/value.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/value.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,85 @@ +/* @file value.h + concurrency helpers Atomic and DiagStr +*/ + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#pragma once + +namespace mongo { + + extern mutex _atomicMutex; + + /** atomic wrapper for a value. enters a mutex on each access. must + be copyable. + */ + template + class Atomic : boost::noncopyable { + T val; + public: + Atomic() { } + + void operator=(const T& a) { + scoped_lock lk(_atomicMutex); + val = a; } + + operator T() const { + scoped_lock lk(_atomicMutex); + return val; } + + /** example: + Atomic q; + ... + { + Atomic::tran t(q); + if( q.ref() > 0 ) + q.ref()--; + } + */ + class tran : private scoped_lock { + Atomic& _a; + public: + tran(Atomic& a) : scoped_lock(_atomicMutex), _a(a) { } + T& ref() { return _a.val; } + }; + }; + + /** this string COULD be mangled but with the double buffering, assuming writes + are infrequent, it's unlikely. thus, this is reasonable for lockless setting of + diagnostic strings, where their content isn't critical. + */ + class DiagStr { + char buf1[256]; + char buf2[256]; + char *p; + public: + DiagStr() { + memset(buf1, 0, 256); + memset(buf2, 0, 256); + p = buf1; + } + + const char * get() const { return p; } + + void set(const char *s) { + char *q = (p==buf1) ? buf2 : buf1; + strncpy(q, s, 255); + p = q; + } + }; + +} diff -Nru mongodb-1.4.4/util/concurrency/vars.cpp mongodb-1.6.3/util/concurrency/vars.cpp --- mongodb-1.4.4/util/concurrency/vars.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/concurrency/vars.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,54 @@ +// vars.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "value.h" +#include "mutex.h" + +namespace mongo { + + mutex _atomicMutex("_atomicMutex"); + + // intentional leak. otherwise destructor orders can be problematic at termination. + MutexDebugger &mutexDebugger = *(new MutexDebugger()); + + MutexDebugger::MutexDebugger() : + x( *(new boost::mutex()) ), magic(0x12345678) { + // optional way to debug lock order + /* + a = "a_lock"; + b = "b_lock"; + */ + } + + void MutexDebugger::programEnding() { + if( logLevel>=1 && followers.size() ) { + std::cout << followers.size() << " mutexes in program" << endl; + for( map< mid, set >::iterator i = followers.begin(); i != followers.end(); i++ ) { + cout << i->first; + if( maxNest[i->first] > 1 ) + cout << " maxNest:" << maxNest[i->first]; + cout << '\n'; + for( set::iterator j = i->second.begin(); j != i->second.end(); j++ ) + cout << " " << *j << '\n'; + } + cout.flush(); + } + } + +} diff -Nru mongodb-1.4.4/util/debug_util.cpp mongodb-1.6.3/util/debug_util.cpp --- mongodb-1.4.4/util/debug_util.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/debug_util.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "../db/cmdline.h" #include "../db/jsobj.h" diff -Nru mongodb-1.4.4/util/debug_util.h mongodb-1.6.3/util/debug_util.h --- mongodb-1.4.4/util/debug_util.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/debug_util.h 2010-09-24 10:02:42.000000000 -0700 @@ -19,7 +19,7 @@ #ifndef _WIN32 #include -#endif // ndef _WIN32 +#endif namespace mongo { @@ -39,42 +39,47 @@ char string[400]; } *OWS; -// for now, running on win32 means development not production -- -// use this to log things just there. -#if defined(_WIN32) -#define WIN if( 1 ) -#else -#define WIN if( 0 ) -#endif - #if defined(_DEBUG) -#define DEV if( 1 ) + enum {DEBUG_BUILD = 1}; #else -#define DEV if( 0 ) + enum {DEBUG_BUILD = 0}; #endif -#define DEBUGGING if( 0 ) +#define MONGO_DEV if( DEBUG_BUILD ) +#define DEV MONGO_DEV + +#define MONGO_DEBUGGING if( 0 ) +#define DEBUGGING MONGO_DEBUGGING // The following declare one unique counter per enclosing function. // NOTE The implementation double-increments on a match, but we don't really care. -#define SOMETIMES( occasion, howOften ) for( static unsigned occasion = 0; ++occasion % howOften == 0; ) -#define OCCASIONALLY SOMETIMES( occasionally, 16 ) -#define RARELY SOMETIMES( rarely, 128 ) -#define ONCE for( static bool undone = true; undone; undone = false ) +#define MONGO_SOMETIMES( occasion, howOften ) for( static unsigned occasion = 0; ++occasion % howOften == 0; ) +#define SOMETIMES MONGO_SOMETIMES + +#define MONGO_OCCASIONALLY SOMETIMES( occasionally, 16 ) +#define OCCASIONALLY MONGO_OCCASIONALLY + +#define MONGO_RARELY SOMETIMES( rarely, 128 ) +#define RARELY MONGO_RARELY + +#define MONGO_ONCE for( static bool undone = true; undone; undone = false ) +#define ONCE MONGO_ONCE #if defined(_WIN32) -#define strcasecmp _stricmp + inline int strcasecmp(const char* s1, const char* s2) {return _stricmp(s1, s2);} #endif // Sets SIGTRAP handler to launch GDB // Noop unless on *NIX and compiled with _DEBUG void setupSIGTRAPforGDB(); -#if defined(_WIN32) - inline void breakpoint() {} //noop -#else // defined(_WIN32) - // code to raise a breakpoint in GDB + extern int tlogLevel; + inline void breakpoint(){ + if ( tlogLevel < 0 ) + return; +#ifndef _WIN32 + // code to raise a breakpoint in GDB ONCE { //prevent SIGTRAP from crashing the program if default action is specified and we are not in gdb struct sigaction current; @@ -83,10 +88,11 @@ signal(SIGTRAP, SIG_IGN); } } - + raise(SIGTRAP); +#endif } -#endif // defined(_WIN32) + // conditional breakpoint inline void breakif(bool test){ diff -Nru mongodb-1.4.4/util/embedded_builder.h mongodb-1.6.3/util/embedded_builder.h --- mongodb-1.4.4/util/embedded_builder.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/embedded_builder.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,6 +18,7 @@ #pragma once namespace mongo { + // utility class for assembling hierarchical objects class EmbeddedBuilder { public: @@ -49,7 +50,7 @@ return; } prepareContext( name ); - back()->appendAs( e, name.c_str() ); + back()->appendAs( e, name ); } BufBuilder &subarrayStartAs( string name ) { prepareContext( name ); diff -Nru mongodb-1.4.4/util/file_allocator.h mongodb-1.6.3/util/file_allocator.h --- mongodb-1.4.4/util/file_allocator.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/file_allocator.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -//file_allocator.h +// @file file_allocator.h /* Copyright 2009 10gen Inc. * @@ -15,10 +15,10 @@ * limitations under the License. */ -#include "../stdafx.h" +#include "../pch.h" #include #include -#if defined(__freebsd__) +#if defined(__freebsd__) || defined(__openbsd__) #include #endif @@ -38,7 +38,7 @@ */ public: #if !defined(_WIN32) - FileAllocator() : failed_() {} + FileAllocator() : pendingMutex_("FileAllocator"), failed_() {} #endif void start() { #if !defined(_WIN32) @@ -106,6 +106,53 @@ #endif } + static void ensureLength( int fd , long size ){ + +#if defined(_WIN32) + // we don't zero on windows + // TODO : we should to avoid fragmentation +#else + +#if defined(__linux__) + int ret = posix_fallocate(fd,0,size); + if ( ret == 0 ) + return; + + log() << "posix_fallocate failed: " << errnoWithDescription( ret ) << " falling back" << endl; +#endif + + off_t filelen = lseek(fd, 0, SEEK_END); + if ( filelen < size ) { + if (filelen != 0) { + stringstream ss; + ss << "failure creating new datafile; lseek failed for fd " << fd << " with errno: " << errnoWithDescription(); + massert( 10440 , ss.str(), filelen == 0 ); + } + // Check for end of disk. + massert( 10441 , "Unable to allocate file of desired size", + size - 1 == lseek(fd, size - 1, SEEK_SET) ); + massert( 10442 , "Unable to allocate file of desired size", + 1 == write(fd, "", 1) ); + lseek(fd, 0, SEEK_SET); + + const long z = 256 * 1024; + const boost::scoped_array buf_holder (new char[z]); + char* buf = buf_holder.get(); + memset(buf, 0, z); + long left = size; + while ( left > 0 ) { + long towrite = left; + if ( towrite > z ) + towrite = z; + + int written = write( fd , buf , towrite ); + massert( 10443 , errnoWithPrefix("write failed" ), written > 0 ); + left -= written; + } + } +#endif + } + private: #if !defined(_WIN32) void checkFailure() { @@ -161,42 +208,26 @@ long fd = open(name.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR); if ( fd <= 0 ) { stringstream ss; - ss << "couldn't open " << name << ' ' << OUTPUT_ERRNO; + ss << "couldn't open " << name << ' ' << errnoWithDescription(); massert( 10439 , ss.str(), fd <= 0 ); } #if defined(POSIX_FADV_DONTNEED) if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) { - log() << "warning: posix_fadvise fails " << name << ' ' << OUTPUT_ERRNO << endl; + log() << "warning: posix_fadvise fails " << name << ' ' << errnoWithDescription() << endl; } #endif - + + Timer t; + /* make sure the file is the full desired length */ - off_t filelen = lseek(fd, 0, SEEK_END); - if ( filelen < size ) { - massert( 10440 , "failure creating new datafile", filelen == 0 ); - // Check for end of disk. - massert( 10441 , "Unable to allocate file of desired size", - size - 1 == lseek(fd, size - 1, SEEK_SET) ); - massert( 10442 , "Unable to allocate file of desired size", - 1 == write(fd, "", 1) ); - lseek(fd, 0, SEEK_SET); - Timer t; - long z = 256 * 1024; - char buf[z]; - memset(buf, 0, z); - long left = size; - while ( left > 0 ) { - long towrite = left; - if ( towrite > z ) - towrite = z; - - int written = write( fd , buf , towrite ); - massert( 10443 , errnostring("write failed" ), written > 0 ); - left -= written; - } - log() << "done allocating datafile " << name << ", size: " << size/1024/1024 << "MB, took " << ((double)t.millis())/1000.0 << " secs" << endl; - } + ensureLength( fd , size ); + + log() << "done allocating datafile " << name << ", " + << "size: " << size/1024/1024 << "MB, " + << " took " << ((double)t.millis())/1000.0 << " secs" + << endl; + close( fd ); } catch ( ... ) { diff -Nru mongodb-1.4.4/util/file.h mongodb-1.6.3/util/file.h --- mongodb-1.4.4/util/file.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/file.h 2010-09-24 10:02:42.000000000 -0700 @@ -27,6 +27,8 @@ #include #endif +#include "text.h" + namespace mongo { #ifndef __sunos__ @@ -47,7 +49,6 @@ #if defined(_WIN32) #include -std::wstring toWideString(const char *s); class File : public FileInterface { HANDLE fd; @@ -68,9 +69,9 @@ fd = INVALID_HANDLE_VALUE; } void open(const char *filename, bool readOnly=false ) { - std::wstring filenamew = toWideString(filename); fd = CreateFile( - filenamew.c_str(), ( readOnly ? 0 : GENERIC_WRITE ) | GENERIC_READ, FILE_SHARE_READ, + toNativeString(filename).c_str(), + ( readOnly ? 0 : GENERIC_WRITE ) | GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); if( !is_open() ) { out() << "CreateFile failed " << filename << endl; @@ -118,7 +119,7 @@ void err(bool ok) { if( !ok && !_bad ) { _bad = true; - log() << "File I/O " << OUTPUT_ERRNO << '\n'; + log() << "File I/O " << errnoWithDescription() << '\n'; } } public: @@ -137,9 +138,11 @@ #endif void open(const char *filename, bool readOnly=false ) { - fd = ::open(filename, O_CREAT | ( readOnly ? 0 : O_RDWR ) | O_NOATIME, S_IRUSR | S_IWUSR); + fd = ::open(filename, + O_CREAT | ( readOnly ? 0 : ( O_RDWR | O_NOATIME ) ) , + S_IRUSR | S_IWUSR); if ( fd <= 0 ) { - out() << "couldn't open " << filename << ' ' << OUTPUT_ERRNO << endl; + out() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl; return; } _bad = false; diff -Nru mongodb-1.4.4/util/goodies.h mongodb-1.6.3/util/goodies.h --- mongodb-1.4.4/util/goodies.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/goodies.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -// goodies.h +// @file goodies.h // miscellaneous junk /* Copyright 2009 10gen Inc. @@ -18,13 +18,22 @@ #pragma once -#if defined(_WIN32) -# include -#endif +#include "../bson/util/misc.h" +#include "concurrency/mutex.h" namespace mongo { -#if !defined(_WIN32) && !defined(NOEXECINFO) && !defined(__freebsd__) && !defined(__sun__) + void setThreadName(const char * name); + string getThreadName(); + + template + inline string ToString(const T& t) { + stringstream s; + s << t; + return s.str(); + } + +#if !defined(_WIN32) && !defined(NOEXECINFO) && !defined(__freebsd__) && !defined(__openbsd__) && !defined(__sun__) } // namespace mongo @@ -52,7 +61,7 @@ o << '\n'; for (i = 0; i < size; i++) o << ' ' << strings[i] << '\n'; - + o.flush(); free (strings); } #else @@ -105,15 +114,14 @@ } // PRINT(2+2); prints "2+2: 4" -#define PRINT(x) cout << #x ": " << (x) << endl +#define MONGO_PRINT(x) cout << #x ": " << (x) << endl +#define PRINT MONGO_PRINT // PRINTFL; prints file:line -#define PRINTFL cout << __FILE__ ":" << __LINE__ << endl - -#undef yassert +#define MONGO_PRINTFL cout << __FILE__ ":" << __LINE__ << endl +#define PRINTFL MONGO_PRINTFL #undef assert -#define assert xassert -#define yassert 1 +#define assert MONGO_assert struct WrappingInt { WrappingInt() { @@ -139,22 +147,6 @@ } }; -} // namespace mongo - -#include - -namespace mongo { - - inline void time_t_to_String(time_t t, char *buf) { -#if defined(_WIN32) - ctime_s(buf, 64, &t); -#else - ctime_r(&t, buf); -#endif - buf[24] = 0; // don't want the \n - } - - inline void time_t_to_Struct(time_t t, struct tm * buf , bool local = false ) { #if defined(_WIN32) if ( local ) @@ -169,12 +161,26 @@ #endif } - - -#define asctime _asctime_not_threadsafe_ -#define gmtime _gmtime_not_threadsafe_ -#define localtime _localtime_not_threadsafe_ -#define ctime _ctime_is_not_threadsafe_ + // uses ISO 8601 dates without trailing Z + // colonsOk should be false when creating filenames + inline string terseCurrentTime(bool colonsOk=true){ + struct tm t; + time_t_to_Struct( time(0) , &t ); + + const char* fmt = (colonsOk ? "%Y-%m-%dT%H:%M:%S" : "%Y-%m-%dT%H-%M-%S"); + char buf[32]; + assert(strftime(buf, sizeof(buf), fmt, &t) == 19); + return buf; + } + +#define MONGO_asctime _asctime_not_threadsafe_ +#define asctime MONGO_asctime +#define MONGO_gmtime _gmtime_not_threadsafe_ +#define gmtime MONGO_gmtime +#define MONGO_localtime _localtime_not_threadsafe_ +#define localtime MONGO_localtime +#define MONGO_ctime _ctime_is_not_threadsafe_ +#define ctime MONGO_ctime #if defined(_WIN32) || defined(__sunos__) inline void sleepsecs(int s) { @@ -183,24 +189,24 @@ xt.sec += s; boost::thread::sleep(xt); } - inline void sleepmillis(int s) { + inline void sleepmillis(long long s) { boost::xtime xt; boost::xtime_get(&xt, boost::TIME_UTC); - xt.sec += ( s / 1000 ); - xt.nsec += ( s % 1000 ) * 1000000; + xt.sec += (int)( s / 1000 ); + xt.nsec += (int)(( s % 1000 ) * 1000000); if ( xt.nsec >= 1000000000 ) { xt.nsec -= 1000000000; xt.sec++; } boost::thread::sleep(xt); } - inline void sleepmicros(int s) { + inline void sleepmicros(long long s) { if ( s <= 0 ) return; boost::xtime xt; boost::xtime_get(&xt, boost::TIME_UTC); - xt.sec += ( s / 1000000 ); - xt.nsec += ( s % 1000000 ) * 1000; + xt.sec += (int)( s / 1000000 ); + xt.nsec += (int)(( s % 1000000 ) * 1000); if ( xt.nsec >= 1000000000 ) { xt.nsec -= 1000000000; xt.sec++; @@ -216,22 +222,23 @@ cout << "nanosleep failed" << endl; } } - inline void sleepmicros(int s) { + inline void sleepmicros(long long s) { if ( s <= 0 ) return; struct timespec t; t.tv_sec = (int)(s / 1000000); - t.tv_nsec = s % 1000000; - if ( nanosleep( &t , 0 ) ){ + t.tv_nsec = 1000 * ( s % 1000000 ); + struct timespec out; + if ( nanosleep( &t , &out ) ){ cout << "nanosleep failed" << endl; } } - inline void sleepmillis(int s) { + inline void sleepmillis(long long s) { sleepmicros( s * 1000 ); } #endif -// note this wraps + // note this wraps inline int tdiff(unsigned told, unsigned tnew) { return WrappingInt::diff(tnew, told); } @@ -242,15 +249,6 @@ return (xt.sec & 0xfffff) * 1000 + t; } - struct Date_t { - // TODO: make signed (and look for related TODO's) - unsigned long long millis; - Date_t(): millis(0) {} - Date_t(unsigned long long m): millis(m) {} - operator unsigned long long&() { return millis; } - operator const unsigned long long&() const { return millis; } - }; - inline Date_t jsTime() { boost::xtime xt; boost::xtime_get(&xt, boost::TIME_UTC); @@ -273,44 +271,7 @@ unsigned secs = xt.sec % 1024; return secs*1000000 + t; } - using namespace boost; - - extern bool __destroyingStatics; - - // If you create a local static instance of this class, that instance will be destroyed - // before all global static objects are destroyed, so __destroyingStatics will be set - // to true before the global static variables are destroyed. - class StaticObserver : boost::noncopyable { - public: - ~StaticObserver() { __destroyingStatics = true; } - }; - - // On pthread systems, it is an error to destroy a mutex while held. Static global - // mutexes may be held upon shutdown in our implementation, and this way we avoid - // destroying them. - class mutex : boost::noncopyable { - public: - mutex() { new (_buf) boost::mutex(); } - ~mutex() { - if( !__destroyingStatics ) { - boost().boost::mutex::~mutex(); - } - } - class scoped_lock : boost::noncopyable { - public: - scoped_lock( mongo::mutex &m ) : _l( m.boost() ) {} - boost::mutex::scoped_lock &boost() { return _l; } - private: - boost::mutex::scoped_lock _l; - }; - private: - boost::mutex &boost() { return *( boost::mutex * )( _buf ); } - char _buf[ sizeof( boost::mutex ) ]; - }; - typedef mongo::mutex::scoped_lock scoped_lock; - typedef boost::recursive_mutex::scoped_lock recursive_scoped_lock; - // simple scoped timer class Timer { public: @@ -320,17 +281,17 @@ Timer( unsigned long long start ) { old = start; } - int seconds(){ + int seconds() const { return (int)(micros() / 1000000); } - int millis() { + int millis() const { return (long)(micros() / 1000); } - unsigned long long micros() { + unsigned long long micros() const { unsigned long long n = curTimeMicros64(); return n - old; } - unsigned long long micros(unsigned long long & n) { // returns cur time in addition to timer result + unsigned long long micros(unsigned long long & n) const { // returns cur time in addition to timer result n = curTimeMicros64(); return n - old; } @@ -364,6 +325,7 @@ if ( strlen(str) < l ) return false; return strncmp(str, prefix, l) == 0; } + inline bool startsWith(string s, string p) { return startsWith(s.c_str(), p.c_str()); } inline bool endsWith(const char *p, const char *suffix) { size_t a = strlen(p); @@ -372,12 +334,6 @@ return strcmp(p + a - b, suffix) == 0; } -} // namespace mongo - -#include "boost/detail/endian.hpp" - -namespace mongo { - inline unsigned long swapEndian(unsigned long x) { return ((x & 0xff) << 24) | @@ -395,15 +351,6 @@ return swapEndian(x); } #endif - - // Like strlen, but only scans up to n bytes. - // Returns -1 if no '0' found. - inline int strnlen( const char *s, int n ) { - for( int i = 0; i < n; ++i ) - if ( !s[ i ] ) - return i; - return -1; - } #if !defined(_WIN32) typedef int HANDLE; @@ -446,7 +393,7 @@ boost::thread_specific_ptr _val; }; - class ProgressMeter { + class ProgressMeter : boost::noncopyable { public: ProgressMeter( long long total , int secondsBetween = 3 , int checkInterval = 100 ){ reset( total , secondsBetween , checkInterval ); @@ -513,6 +460,10 @@ buf << _done << "/" << _total << " " << (_done*100)/_total << "%"; return buf.str(); } + + bool operator==( const ProgressMeter& other ) const { + return this == &other; + } private: bool _active; @@ -526,9 +477,39 @@ int _lastTime; }; + class ProgressMeterHolder : boost::noncopyable { + public: + ProgressMeterHolder( ProgressMeter& pm ) + : _pm( pm ){ + } + + ~ProgressMeterHolder(){ + _pm.finished(); + } + + ProgressMeter* operator->(){ + return &_pm; + } + + bool hit( int n = 1 ){ + return _pm.hit( n ); + } + + void finished(){ + _pm.finished(); + } + + bool operator==( const ProgressMeter& other ){ + return _pm == other; + } + + private: + ProgressMeter& _pm; + }; + class TicketHolder { public: - TicketHolder( int num ){ + TicketHolder( int num ) : _mutex("TicketHolder") { _outof = num; _num = num; } @@ -562,14 +543,16 @@ _num = _outof - used; } - int available(){ + int available() const { return _num; } - int used(){ + int used() const { return _outof - _num; } + int outof() const { return _outof; } + private: int _outof; int _num; @@ -611,7 +594,7 @@ _buf = 0; } - operator string() const { + string toString() const { string s = _buf; return s; } @@ -634,11 +617,11 @@ } bool operator!=( const char * str ) const { - return strcmp( _buf , str ); + return strcmp( _buf , str ) != 0; } bool empty() const { - return _buf[0] == 0; + return _buf == 0 || _buf[0] == 0; } private: @@ -651,6 +634,20 @@ inline bool isNumber( char c ) { return c >= '0' && c <= '9'; } + + inline unsigned stringToNum(const char *str) { + unsigned x = 0; + const char *p = str; + while( 1 ) { + if( !isNumber(*p) ) { + if( *p == 0 && p != str ) + break; + throw 0; + } + x = x * 10 + *p++ - '0'; + } + return x; + } // for convenience, '{' is greater than anything and stops number parsing inline int lexNumCmp( const char *s1, const char *s2 ) { @@ -703,5 +700,46 @@ return -1; return 0; } - + + /** A generic pointer type for function arguments. + * It will convert from any pointer type except auto_ptr. + * Semantics are the same as passing the pointer returned from get() + * const ptr => T * const + * ptr => T const * or const T* + */ + template + struct ptr{ + + ptr() : _p(NULL) {} + + // convert to ptr + ptr(T* p) : _p(p) {} // needed for NULL + template ptr(U* p) : _p(p) {} + template ptr(const ptr& p) : _p(p) {} + template ptr(const boost::shared_ptr& p) : _p(p.get()) {} + template ptr(const boost::scoped_ptr& p) : _p(p.get()) {} + //template ptr(const auto_ptr& p) : _p(p.get()) {} + + // assign to ptr + ptr& operator= (T* p) { _p = p; return *this; } // needed for NULL + template ptr& operator= (U* p) { _p = p; return *this; } + template ptr& operator= (const ptr& p) { _p = p; return *this; } + template ptr& operator= (const boost::shared_ptr& p) { _p = p.get(); return *this; } + template ptr& operator= (const boost::scoped_ptr& p) { _p = p.get(); return *this; } + //template ptr& operator= (const auto_ptr& p) { _p = p.get(); return *this; } + + // use + T* operator->() const { return _p; } + T& operator*() const { return *_p; } + + // convert from ptr + operator T* () const { return _p; } + + private: + T* _p; + }; + + /** Hmmmm */ + using namespace boost; + } // namespace mongo diff -Nru mongodb-1.4.4/util/hashtab.h mongodb-1.6.3/util/hashtab.h --- mongodb-1.4.4/util/hashtab.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/hashtab.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,7 +22,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include namespace mongo { @@ -36,7 +36,8 @@ template < class Key, - class Type + class Type, + class PTR > class HashTable : boost::noncopyable { public: @@ -51,10 +52,15 @@ void setUnused() { hash = 0; } - } *nodes; + }; + PTR _buf; int n; int maxChain; + Node& nodes(int i) { + return *((Node*) _buf.at(i * sizeof(Node), sizeof(Node))); + } + int _find(const Key& k, bool& found) { found = false; int h = k.hash(); @@ -63,12 +69,12 @@ int chain = 0; int firstNonUsed = -1; while ( 1 ) { - if ( !nodes[i].inUse() ) { + if ( !nodes(i).inUse() ) { if ( firstNonUsed < 0 ) firstNonUsed = i; } - if ( nodes[i].hash == h && nodes[i].k == k ) { + if ( nodes(i).hash == h && nodes(i).k == k ) { if ( chain >= 200 ) out() << "warning: hashtable " << name << " long chain " << endl; found = true; @@ -92,24 +98,28 @@ public: /* buf must be all zeroes on initialization. */ - HashTable(void *buf, int buflen, const char *_name) : name(_name) { + HashTable(PTR buf, int buflen, const char *_name) : name(_name) { int m = sizeof(Node); // out() << "hashtab init, buflen:" << buflen << " m:" << m << endl; n = buflen / m; if ( (n & 1) == 0 ) n--; maxChain = (int) (n * 0.05); - nodes = (Node *) buf; + _buf = buf; + //nodes = (Node *) buf; + + if ( sizeof(Node) != 628 ){ + out() << "HashTable() " << _name << " sizeof(node):" << sizeof(Node) << " n:" << n << " sizeof(Key): " << sizeof(Key) << " sizeof(Type):" << sizeof(Type) << endl; + assert( sizeof(Node) == 628 ); + } - assert( sizeof(Node) == 628 ); - //out() << "HashTable() " << _name << " sizeof(node):" << sizeof(Node) << " n:" << n << endl; } Type* get(const Key& k) { bool found; int i = _find(k, found); if ( found ) - return &nodes[i].value; + return &nodes(i).value; return 0; } @@ -117,8 +127,9 @@ bool found; int i = _find(k, found); if ( i >= 0 && found ) { - nodes[i].k.kill(); - nodes[i].setUnused(); + Node& n = nodes(i); + n.k.kill(); + n.setUnused(); } } /* @@ -136,24 +147,34 @@ int i = _find(k, found); if ( i < 0 ) return false; + Node& n = nodes(i); if ( !found ) { - nodes[i].k = k; - nodes[i].hash = k.hash(); + n.k = k; + n.hash = k.hash(); } else { - assert( nodes[i].hash == k.hash() ); + assert( n.hash == k.hash() ); } - nodes[i].value = value; + n.value = value; return true; } typedef void (*IteratorCallback)( const Key& k , Type& v ); - void iterAll( IteratorCallback callback ){ for ( int i=0; i(inRaw); + for (int i=0; i> 4]; + char lo = hexchars[(c & 0x0F)]; + + out << hi << lo; + } + + return out.str(); + } + + inline string toHexLower(const void* inRaw, int len){ + static const char hexchars[] = "0123456789abcdef"; + + StringBuilder out; + const char* in = reinterpret_cast(inRaw); + for (int i=0; i> 4]; + char lo = hexchars[(c & 0x0F)]; + + out << hi << lo; + } + + return out.str(); + } } diff -Nru mongodb-1.4.4/util/histogram.cpp mongodb-1.6.3/util/histogram.cpp --- mongodb-1.4.4/util/histogram.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/histogram.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,129 @@ +// histogram.cc + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include +#include +#include + +#include "histogram.h" + +namespace mongo { + + using std::ostringstream; + using std::setfill; + using std::setw; + + Histogram::Histogram( const Options& opts ) + : _initialValue( opts.initialValue ) + , _numBuckets( opts.numBuckets ) + , _boundaries( new uint32_t[_numBuckets] ) + , _buckets( new uint64_t[_numBuckets] ){ + + // TODO more sanity checks + // + not too few buckets + // + initialBucket and bucketSize fit within 32 bit ints + + // _boundaries store the maximum value falling in that bucket. + if ( opts.exponential ){ + uint32_t twoPow = 1; // 2^0 + for ( uint32_t i = 0; i < _numBuckets - 1; i++){ + _boundaries[i] = _initialValue + opts.bucketSize * twoPow; + twoPow *= 2; // 2^i+1 + } + } else { + _boundaries[0] = _initialValue + opts.bucketSize; + for ( uint32_t i = 1; i < _numBuckets - 1; i++ ){ + _boundaries[i] = _boundaries[ i-1 ] + opts.bucketSize; + } + } + _boundaries[ _numBuckets-1 ] = std::numeric_limits::max(); + + for ( uint32_t i = 0; i < _numBuckets; i++ ) { + _buckets[i] = 0; + } + } + + Histogram::~Histogram() { + delete [] _boundaries; + delete [] _buckets; + } + + void Histogram::insert( uint32_t element ){ + if ( element < _initialValue) return; + + _buckets[ _findBucket(element) ] += 1; + } + + string Histogram::toHTML() const{ + uint64_t max = 0; + for ( uint32_t i = 0; i < _numBuckets; i++ ){ + if ( _buckets[i] > max ){ + max = _buckets[i]; + } + } + if ( max == 0 ) { + return "histogram is empty\n"; + } + + // normalize buckets to max + const int maxBar = 20; + ostringstream ss; + for ( uint32_t i = 0; i < _numBuckets; i++ ){ + int barSize = _buckets[i] * maxBar / max; + ss << string( barSize,'*' ) + << setfill(' ') << setw( maxBar-barSize + 12 ) + << _boundaries[i] << '\n'; + } + + return ss.str(); + } + + uint64_t Histogram::getCount( uint32_t bucket ) const { + if ( bucket >= _numBuckets ) return 0; + + return _buckets[ bucket ]; + } + + uint32_t Histogram::getBoundary( uint32_t bucket ) const { + if ( bucket >= _numBuckets ) return 0; + + return _boundaries[ bucket ]; + } + + uint32_t Histogram::getBucketsNum() const { + return _numBuckets; + } + + uint32_t Histogram::_findBucket( uint32_t element ) const{ + // TODO assert not too small a value? + + uint32_t low = 0; + uint32_t high = _numBuckets - 1; + while ( low < high ){ + // low + ( (high - low) / 2 ); + uint32_t mid = ( low + high ) >> 1; + if ( element > _boundaries[ mid ] ){ + low = mid + 1; + } else { + high = mid; + } + } + return low; + } + +} // namespace mongo diff -Nru mongodb-1.4.4/util/histogram.h mongodb-1.6.3/util/histogram.h --- mongodb-1.4.4/util/histogram.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/histogram.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,128 @@ +// histogram.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef UTIL_HISTOGRAM_HEADER +#define UTIL_HISTOGRAM_HEADER + +#include "../pch.h" + +#include + +namespace mongo { + + using std::string; + + /** + * A histogram for a 32-bit integer range. + */ + class Histogram { + public: + /** + * Construct a histogram with 'numBuckets' buckets, optionally + * having the first bucket start at 'initialValue' rather than + * 0. By default, the histogram buckets will be 'bucketSize' wide. + * + * Usage example: + * Histogram::Options opts; + * opts.numBuckets = 3; + * opts.bucketSize = 10; + * Histogram h( opts ); + * + * Generates the bucket ranges [0..10],[11..20],[21..max_int] + * + * Alternatively, the flag 'exponential' could be turned on, in + * which case a bucket's maximum value will be + * initialValue + bucketSize * 2 ^ [0..numBuckets-1] + * + * Usage example: + * Histogram::Options opts; + * opts.numBuckets = 4; + * opts.bucketSize = 125; + * opts.exponential = true; + * Histogram h( opts ); + * + * Generates the bucket ranges [0..125],[126..250],[251..500],[501..max_int] + */ + struct Options { + boost::uint32_t numBuckets; + boost::uint32_t bucketSize; + boost::uint32_t initialValue; + + // use exponential buckets? + bool exponential; + + Options() + : numBuckets(0) + , bucketSize(0) + , initialValue(0) + , exponential(false){} + }; + explicit Histogram( const Options& opts ); + ~Histogram(); + + /** + * Find the bucket that 'element' falls into and increment its count. + */ + void insert( boost::uint32_t element ); + + /** + * Render the histogram as string that can be used inside an + * HTML doc. + */ + string toHTML() const; + + // testing interface below -- consider it private + + /** + * Return the count for the 'bucket'-th bucket. + */ + boost::uint64_t getCount( boost::uint32_t bucket ) const; + + /** + * Return the maximum element that would fall in the + * 'bucket'-th bucket. + */ + boost::uint32_t getBoundary( boost::uint32_t bucket ) const; + + /** + * Return the number of buckets in this histogram. + */ + boost::uint32_t getBucketsNum() const; + + private: + /** + * Returns the bucket where 'element' should fall + * into. Currently assumes that 'element' is greater than the + * minimum 'inialValue'. + */ + boost::uint32_t _findBucket( boost::uint32_t element ) const; + + boost::uint32_t _initialValue; // no value lower than it is recorded + boost::uint32_t _numBuckets; // total buckets in the histogram + + // all below owned here + boost::uint32_t* _boundaries; // maximum element of each bucket + boost::uint64_t* _buckets; // current count of each bucket + + Histogram( const Histogram& ); + Histogram& operator=( const Histogram& ); + }; + +} // namespace mongo + +#endif // UTIL_HISTOGRAM_HEADER diff -Nru mongodb-1.4.4/util/hostandport.h mongodb-1.6.3/util/hostandport.h --- mongodb-1.4.4/util/hostandport.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/hostandport.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,142 @@ +// hostandport.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "sock.h" +#include "../db/cmdline.h" +#include "mongoutils/str.h" + +namespace mongo { + + using namespace mongoutils; + + /** helper for manipulating host:port connection endpoints. + */ + struct HostAndPort { + HostAndPort() : _port(-1) { } + + /** From a string hostname[:portnumber] + Throws user assertion if bad config string or bad port #. + */ + HostAndPort(string s); + + /** @param p port number. -1 is ok to use default. */ + HostAndPort(string h, int p /*= -1*/) : _host(h), _port(p) { } + + HostAndPort(const SockAddr& sock ) + : _host( sock.getAddr() ) , _port( sock.getPort() ){ + } + + static HostAndPort me() { + return HostAndPort("localhost", cmdLine.port); + } + + /* uses real hostname instead of localhost */ + static HostAndPort Me(); + + bool operator<(const HostAndPort& r) const { + if( _host < r._host ) + return true; + if( _host == r._host ) + return port() < r.port(); + return false; + } + + bool operator==(const HostAndPort& r) const { + return _host == r._host && port() == r.port(); + } + + /* returns true if the host/port combo identifies this process instance. */ + bool isSelf() const; // defined in message.cpp + + bool isLocalHost() const; + + // @returns host:port + string toString() const; + + operator string() const { return toString(); } + + string host() const { return _host; } + + int port() const { return _port >= 0 ? _port : CmdLine::DefaultDBPort; } + bool hasPort() const { return _port >= 0; } + void setPort( int port ) { _port = port; } + + private: + // invariant (except full obj assignment): + string _host; + int _port; // -1 indicates unspecified + }; + + /** returns true if strings seem to be the same hostname. + "nyc1" and "nyc1.acme.com" are treated as the same. + in fact "nyc1.foo.com" and "nyc1.acme.com" are treated the same - + we oly look up to the first period. + */ + inline bool sameHostname(const string& a, const string& b) { + return str::before(a, '.') == str::before(b, '.'); + } + + inline HostAndPort HostAndPort::Me() { + string h = getHostName(); + assert( !h.empty() ); + assert( h != "localhost" ); + return HostAndPort(h, cmdLine.port); + } + + inline string HostAndPort::toString() const { + stringstream ss; + ss << _host; + if ( _port != -1 ){ + ss << ':'; +#if defined(_DEBUG) + if( _port >= 44000 && _port < 44100 ) { + log() << "warning: special debug port 44xxx used" << endl; + ss << _port+1; + } + else + ss << _port; +#else + ss << _port; +#endif + } + return ss.str(); + } + + inline bool HostAndPort::isLocalHost() const { + return _host == "localhost" || startsWith(_host.c_str(), "127.") || _host == "::1"; + } + + inline HostAndPort::HostAndPort(string s) { + const char *p = s.c_str(); + uassert(13110, "HostAndPort: bad config string", *p); + const char *colon = strrchr(p, ':'); + if( colon ) { + int port = atoi(colon+1); + uassert(13095, "HostAndPort: bad port #", port > 0); + _host = string(p,colon-p); + _port = port; + } + else { + // no port specified. + _host = p; + _port = -1; + } + } + +} diff -Nru mongodb-1.4.4/util/httpclient.cpp mongodb-1.6.3/util/httpclient.cpp --- mongodb-1.4.4/util/httpclient.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/httpclient.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,11 +15,11 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "httpclient.h" #include "sock.h" #include "message.h" -#include "builder.h" +#include "../bson/util/builder.h" namespace mongo { @@ -94,15 +94,11 @@ { const char * out = req.c_str(); int toSend = req.size(); - while ( toSend ){ - int did = p.send( out , toSend ); - toSend -= did; - out += did; - } + p.send( out , toSend, "_go" ); } char buf[4096]; - int got = p.recv( buf , 4096 ); + int got = p.unsafe_recv( buf , 4096 ); buf[got] = 0; int rc; @@ -114,19 +110,41 @@ if ( result ) sb << buf; - while ( ( got = p.recv( buf , 4096 ) ) > 0){ + while ( ( got = p.unsafe_recv( buf , 4096 ) ) > 0){ if ( result ) sb << buf; } if ( result ){ - result->_code = rc; - result->_entireResponse = sb.str(); + result->_init( rc , sb.str() ); } return rc; } + void HttpClient::Result::_init( int code , string entire ){ + _code = code; + _entireResponse = entire; + + while ( true ){ + size_t i = entire.find( '\n' ); + if ( i == string::npos ){ + // invalid + break; + } + + string h = entire.substr( 0 , i ); + entire = entire.substr( i + 1 ); + + if ( h.size() && h[h.size()-1] == '\r' ) + h = h.substr( 0 , h.size() - 1 ); + + if ( h.size() == 0 ) + break; + } + + _body = entire; + } } diff -Nru mongodb-1.4.4/util/httpclient.h mongodb-1.6.3/util/httpclient.h --- mongodb-1.4.4/util/httpclient.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/httpclient.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" namespace mongo { @@ -31,9 +31,25 @@ const string& getEntireResponse() const { return _entireResponse; } + + const map getHeaders() const { + return _headers; + } + + const string& getBody() const { + return _body; + } + private: + + void _init( int code , string entire ); + int _code; string _entireResponse; + + map _headers; + string _body; + friend class HttpClient; }; diff -Nru mongodb-1.4.4/util/log.cpp mongodb-1.6.3/util/log.cpp --- mongodb-1.4.4/util/log.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/log.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,123 @@ +/** @file log.cpp + */ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pch.h" +#include "assert_util.h" +#include "assert.h" +#include "file.h" +#include +using namespace std; + +#ifndef _WIN32 +#include +#include +#endif + +#include "../db/jsobj.h" + +namespace mongo { + + Nullstream nullstream; + vector* Logstream::globalTees = 0; + + thread_specific_ptr Logstream::tsp; + + class LoggingManager { + public: + LoggingManager() + : _enabled(0) , _file(0) { + } + + void start( const string& lp , bool append ){ + uassert( 10268 , "LoggingManager already started" , ! _enabled ); + _append = append; + + // test path + FILE * test = fopen( lp.c_str() , _append ? "a" : "w" ); + if ( ! test ){ + cout << "can't open [" << lp << "] for log file: " << errnoWithDescription() << endl; + dbexit( EXIT_BADOPTIONS ); + assert( 0 ); + } + fclose( test ); + + _path = lp; + _enabled = 1; + rotate(); + } + + void rotate(){ + if ( ! _enabled ){ + cout << "LoggingManager not enabled" << endl; + return; + } + + if ( _file ){ +#ifdef _WIN32 + cout << "log rotation doesn't work on windows" << endl; + return; +#else + struct tm t; + localtime_r( &_opened , &t ); + + stringstream ss; + ss << _path << "." << terseCurrentTime(false); + string s = ss.str(); + rename( _path.c_str() , s.c_str() ); +#endif + } + + + FILE* tmp = freopen(_path.c_str(), (_append ? "a" : "w"), stdout); + if (!tmp){ + cerr << "can't open: " << _path.c_str() << " for log file" << endl; + dbexit( EXIT_BADOPTIONS ); + assert(0); + } + + Logstream::setLogFile(tmp); // after this point no thread will be using old file + + _file = tmp; + _opened = time(0); + } + + private: + + bool _enabled; + string _path; + bool _append; + + FILE * _file; + time_t _opened; + + } loggingManager; + + void initLogging( const string& lp , bool append ){ + cout << "all output going to: " << lp << endl; + loggingManager.start( lp , append ); + } + + void rotateLogs( int signal ){ + loggingManager.rotate(); + } + + // done *before* static initialization + FILE* Logstream::logfile = stdout; + +} + diff -Nru mongodb-1.4.4/util/log.h mongodb-1.6.3/util/log.h --- mongodb-1.4.4/util/log.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/log.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -// log.h +// @file log.h /* Copyright 2009 10gen Inc. * @@ -19,12 +19,33 @@ #include #include +#include "../bson/util/builder.h" -namespace mongo { +#ifndef _WIN32 +//#include +#endif - using boost::shared_ptr; +namespace mongo { - // Utility interface for stringifying object only when val() called. + enum LogLevel { LL_DEBUG , LL_INFO , LL_NOTICE , LL_WARNING , LL_ERROR , LL_SEVERE }; + + inline const char * logLevelToString( LogLevel l ){ + switch ( l ){ + case LL_DEBUG: + case LL_INFO: + case LL_NOTICE: + return ""; + case LL_WARNING: + return "warning" ; + case LL_ERROR: + return "ERROR"; + case LL_SEVERE: + return "SEVERE"; + default: + return "UNKNOWN"; + } + } + class LazyString { public: virtual ~LazyString() {} @@ -36,17 +57,29 @@ class LazyStringImpl : public LazyString { public: LazyStringImpl( const T &t ) : t_( t ) {} - virtual string val() const { return (string)t_; } + virtual string val() const { return t_.toString(); } private: const T& t_; }; + class Tee { + public: + virtual ~Tee(){} + virtual void write(LogLevel level , const string& str) = 0; + }; + class Nullstream { public: + virtual Nullstream& operator<< (Tee* tee) { + return *this; + } virtual ~Nullstream() {} virtual Nullstream& operator<<(const char *) { return *this; } + virtual Nullstream& operator<<(const string& ) { + return *this; + } virtual Nullstream& operator<<(char *) { return *this; } @@ -111,50 +144,67 @@ virtual Nullstream& operator<< (ios_base& (*hex)(ios_base&)) { return *this; } - virtual void flush(){} + virtual void flush(Tee *t = 0) {} }; extern Nullstream nullstream; -#define LOGIT { ss << x; return *this; } - class Logstream : public Nullstream { static mongo::mutex mutex; static int doneSetup; stringstream ss; + LogLevel logLevel; + static FILE* logfile; + static boost::scoped_ptr stream; + static vector * globalTees; public: + + inline static void logLockless( const StringData& s ); + + static void setLogFile(FILE* f){ + scoped_lock lk(mutex); + logfile = f; + } + static int magicNumber(){ return 1717; } - void flush() { - // this ensures things are sane - if ( doneSetup == 1717 ){ - scoped_lock lk(mutex); - cout << ss.str(); - cout.flush(); - } - ss.str(""); + + inline void flush(Tee *t = 0); + + inline Nullstream& setLogLevel(LogLevel l){ + logLevel = l; + return *this; } - Logstream& operator<<(const char *x) LOGIT - Logstream& operator<<(char *x) LOGIT - Logstream& operator<<(char x) LOGIT - Logstream& operator<<(int x) LOGIT - Logstream& operator<<(ExitCode x) LOGIT - Logstream& operator<<(long x) LOGIT - Logstream& operator<<(unsigned long x) LOGIT - Logstream& operator<<(unsigned x) LOGIT - Logstream& operator<<(double x) LOGIT - Logstream& operator<<(void *x) LOGIT - Logstream& operator<<(const void *x) LOGIT - Logstream& operator<<(long long x) LOGIT - Logstream& operator<<(unsigned long long x) LOGIT - Logstream& operator<<(bool x) LOGIT + + /** note these are virtual */ + Logstream& operator<<(const char *x) { ss << x; return *this; } + Logstream& operator<<(const string& x) { ss << x; return *this; } + Logstream& operator<<(char *x) { ss << x; return *this; } + Logstream& operator<<(char x) { ss << x; return *this; } + Logstream& operator<<(int x) { ss << x; return *this; } + Logstream& operator<<(ExitCode x) { ss << x; return *this; } + Logstream& operator<<(long x) { ss << x; return *this; } + Logstream& operator<<(unsigned long x) { ss << x; return *this; } + Logstream& operator<<(unsigned x) { ss << x; return *this; } + Logstream& operator<<(double x) { ss << x; return *this; } + Logstream& operator<<(void *x) { ss << x; return *this; } + Logstream& operator<<(const void *x) { ss << x; return *this; } + Logstream& operator<<(long long x) { ss << x; return *this; } + Logstream& operator<<(unsigned long long x) { ss << x; return *this; } + Logstream& operator<<(bool x) { ss << x; return *this; } + Logstream& operator<<(const LazyString& x) { ss << x.val(); return *this; } + Nullstream& operator<< (Tee* tee) { + ss << '\n'; + flush(tee); + return *this; + } Logstream& operator<< (ostream& ( *_endl )(ostream&)) { ss << '\n'; - flush(); + flush(0); return *this; } Logstream& operator<< (ios_base& (*_hex)(ios_base&)) { @@ -168,20 +218,29 @@ if ( ! t ) *this << "null"; else - *this << t; + *this << *t; return *this; } Logstream& prolog() { - char now[64]; - time_t_to_String(time(0), now); - now[20] = 0; - ss << now; return *this; } + + void addGlobalTee( Tee * t ){ + if ( ! globalTees ) + globalTees = new vector(); + globalTees->push_back( t ); + } private: static thread_specific_ptr tsp; + Logstream(){ + _init(); + } + void _init(){ + ss.str(""); + logLevel = LL_INFO; + } public: static Logstream& get() { Logstream *p = tsp.get(); @@ -192,6 +251,7 @@ }; extern int logLevel; + extern int tlogLevel; inline Nullstream& out( int level = 0 ) { if ( level > logLevel ) @@ -203,7 +263,7 @@ at the specified level or higher. */ inline void logflush(int level = 0) { if( level > logLevel ) - Logstream::get().flush(); + Logstream::get().flush(0); } /* without prolog */ @@ -213,15 +273,35 @@ return Logstream::get(); } - inline Nullstream& log( int level = 0 ) { + /** logging which we may not want during unit tests runs. + set tlogLevel to -1 to suppress tlog() output in a test program. */ + inline Nullstream& tlog( int level = 0 ) { + if ( level > tlogLevel || level > logLevel ) + return nullstream; + return Logstream::get().prolog(); + } + + inline Nullstream& log( int level ) { if ( level > logLevel ) return nullstream; return Logstream::get().prolog(); } - /* TODOCONCURRENCY */ - inline ostream& stdcout() { - return cout; + inline Nullstream& log( LogLevel l ) { + return Logstream::get().prolog().setLogLevel( l ); + } + + + inline Nullstream& log() { + return Logstream::get().prolog(); + } + + inline Nullstream& error() { + return log( LL_ERROR ); + } + + inline Nullstream& warning() { + return log( LL_WARNING ); } /* default impl returns "" -- mongod overrides */ @@ -242,9 +322,115 @@ void initLogging( const string& logpath , bool append ); void rotateLogs( int signal = 0 ); -#define OUTPUT_ERRNOX(x) "errno:" << x << " " << strerror(x) -#define OUTPUT_ERRNO OUTPUT_ERRNOX(errno) + std::string toUtf8String(const std::wstring& wide); + + inline string errnoWithDescription(int x = errno) { + stringstream s; + s << "errno:" << x << ' '; + +#if defined(_WIN32) + LPTSTR errorText = NULL; + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM + |FORMAT_MESSAGE_ALLOCATE_BUFFER + |FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + x, 0, + (LPTSTR) &errorText, // output + 0, // minimum size for output buffer + NULL); + if( errorText ) { + string x = toUtf8String(errorText); + for( string::iterator i = x.begin(); i != x.end(); i++ ) { + if( *i == '\n' || *i == '\r' ) + break; + s << *i; + } + LocalFree(errorText); + } + else + s << strerror(x); + /* + DWORD n = FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, x, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) &lpMsgBuf, 0, NULL); + */ +#else + s << strerror(x); +#endif + return s.str(); + } + + /** output the error # and error message with prefix. + handy for use as parm in uassert/massert. + */ + string errnoWithPrefix( const char * prefix ); + + void Logstream::logLockless( const StringData& s ){ + if ( doneSetup == 1717 ){ + if(fwrite(s.data(), s.size(), 1, logfile)){ + fflush(logfile); + }else{ + int x = errno; + cout << "Failed to write to logfile: " << errnoWithDescription(x) << ": " << out << endl; + } + } + else { + cout << s.data() << endl; + } + } + + void Logstream::flush(Tee *t) { + // this ensures things are sane + if ( doneSetup == 1717 ) { + string msg = ss.str(); + string threadName = getThreadName(); + const char * type = logLevelToString(logLevel); + + int spaceNeeded = msg.size() + 64 + threadName.size(); + int bufSize = 128; + while ( bufSize < spaceNeeded ) + bufSize += 128; + + BufBuilder b(bufSize); + time_t_to_String( time(0) , b.grow(20) ); + if (!threadName.empty()){ + b.appendChar( '[' ); + b.appendStr( threadName , false ); + b.appendChar( ']' ); + b.appendChar( ' ' ); + } + if ( type[0] ){ + b.appendStr( type , false ); + b.appendStr( ": " , false ); + } + b.appendStr( msg ); - string errnostring( const char * prefix = 0 ); + string out( b.buf() , b.len() - 1); + + scoped_lock lk(mutex); + + if( t ) t->write(logLevel,out); + if ( globalTees ){ + for ( unsigned i=0; isize(); i++ ) + (*globalTees)[i]->write(logLevel,out); + } + +#ifndef _WIN32 + //syslog( LOG_INFO , "%s" , cc ); +#endif + if(fwrite(out.data(), out.size(), 1, logfile)){ + fflush(logfile); + }else{ + int x = errno; + cout << "Failed to write to logfile: " << errnoWithDescription(x) << ": " << out << endl; + } + } + _init(); + } } // namespace mongo diff -Nru mongodb-1.4.4/util/lruishmap.h mongodb-1.6.3/util/lruishmap.h --- mongodb-1.4.4/util/lruishmap.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/lruishmap.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,7 +17,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../util/goodies.h" namespace mongo { diff -Nru mongodb-1.4.4/util/md5main.cpp mongodb-1.6.3/util/md5main.cpp --- mongodb-1.4.4/util/md5main.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/md5main.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -40,7 +40,7 @@ 2002-04-13 lpd Splits off main program into a separate file, md5main.c. */ -#include "stdafx.h" +#include "pch.h" #include "md5.h" #include #include diff -Nru mongodb-1.4.4/util/message.cpp mongodb-1.6.3/util/message.cpp --- mongodb-1.4.4/util/message.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/message.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "message.h" #include #include "../util/goodies.h" @@ -26,10 +26,25 @@ #include #include #include "../db/cmdline.h" +#include "../client/dbclient.h" + +#ifndef _WIN32 +#include +#else + +// errno doesn't work for winsock. +#undef errno +#define errno WSAGetLastError() + +#endif namespace mongo { + bool noUnixSocket = false; + bool objcheck = false; + + void checkTicketNumbers(); // if you want trace output: #define mmm(x) @@ -42,54 +57,171 @@ const int portRecvFlags = 0; #endif - /* listener ------------------------------------------------------------------- */ + const Listener* Listener::_timeTracker; - bool Listener::init() { - SockAddr me; - if ( ip.empty() ) - me = SockAddr( port ); - else - me = SockAddr( ip.c_str(), port ); - sock = ::socket(AF_INET, SOCK_STREAM, 0); - if ( sock == INVALID_SOCKET ) { - log() << "ERROR: listen(): invalid socket? " << OUTPUT_ERRNO << endl; - return false; - } - prebindOptions( sock ); - if ( ::bind(sock, (sockaddr *) &me.sa, me.addressSize) != 0 ) { - log() << "listen(): bind() failed " << OUTPUT_ERRNO << " for port: " << port << endl; - closesocket(sock); - return false; + vector ipToAddrs(const char* ips, int port){ + vector out; + if (*ips == '\0'){ + out.push_back(SockAddr("0.0.0.0", port)); // IPv4 all + + if (IPv6Enabled()) + out.push_back(SockAddr("::", port)); // IPv6 all +#ifndef _WIN32 + if (!noUnixSocket) + out.push_back(SockAddr(makeUnixSockPath(port).c_str(), port)); // Unix socket +#endif + return out; } - if ( ::listen(sock, 128) != 0 ) { - log() << "listen(): listen() failed " << OUTPUT_ERRNO << endl; - closesocket(sock); - return false; + while(*ips){ + string ip; + const char * comma = strchr(ips, ','); + if (comma){ + ip = string(ips, comma - ips); + ips = comma + 1; + }else{ + ip = string(ips); + ips = ""; + } + + SockAddr sa(ip.c_str(), port); + out.push_back(sa); + +#ifndef _WIN32 + if (!noUnixSocket && (sa.getAddr() == "127.0.0.1" || sa.getAddr() == "0.0.0.0")) // only IPv4 + out.push_back(SockAddr(makeUnixSockPath(port).c_str(), port)); +#endif } - - return true; + return out; + } - void Listener::listen() { + /* listener ------------------------------------------------------------------- */ + + void Listener::initAndListen() { + checkTicketNumbers(); + vector mine = ipToAddrs(_ip.c_str(), _port); + vector socks; + SOCKET maxfd = 0; // needed for select() + + for (vector::iterator it=mine.begin(), end=mine.end(); it != end; ++it){ + SockAddr& me = *it; + + SOCKET sock = ::socket(me.getType(), SOCK_STREAM, 0); + if ( sock == INVALID_SOCKET ) { + log() << "ERROR: listen(): invalid socket? " << errnoWithDescription() << endl; + } + + if (me.getType() == AF_UNIX){ +#if !defined(_WIN32) + if (unlink(me.getAddr().c_str()) == -1){ + int x = errno; + if (x != ENOENT){ + log() << "couldn't unlink socket file " << me << errnoWithDescription(x) << " skipping" << endl; + continue; + } + } +#endif + } else if (me.getType() == AF_INET6) { + // IPv6 can also accept IPv4 connections as mapped addresses (::ffff:127.0.0.1) + // That causes a conflict if we don't do set it to IPV6_ONLY + const int one = 1; + setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*) &one, sizeof(one)); + } + + prebindOptions( sock ); + + if ( ::bind(sock, me.raw(), me.addressSize) != 0 ) { + int x = errno; + log() << "listen(): bind() failed " << errnoWithDescription(x) << " for socket: " << me.toString() << endl; + if ( x == EADDRINUSE ) + log() << " addr already in use" << endl; + closesocket(sock); + return; + } + + if ( ::listen(sock, 128) != 0 ) { + log() << "listen(): listen() failed " << errnoWithDescription() << endl; + closesocket(sock); + return; + } + + ListeningSockets::get()->add( sock ); + + socks.push_back(sock); + if (sock > maxfd) + maxfd = sock; + } + static long connNumber = 0; - SockAddr from; + struct timeval maxSelectTime; while ( ! inShutdown() ) { - int s = accept(sock, (sockaddr *) &from.sa, &from.addressSize); - if ( s < 0 ) { - if ( errno == ECONNABORTED || errno == EBADF ) { - log() << "Listener on port " << port << " aborted" << endl; - return; - } - log() << "Listener: accept() returns " << s << " " << OUTPUT_ERRNO << endl; + fd_set fds[1]; + FD_ZERO(fds); + + for (vector::iterator it=socks.begin(), end=socks.end(); it != end; ++it){ + FD_SET(*it, fds); + } + + maxSelectTime.tv_sec = 0; + maxSelectTime.tv_usec = 10000; + const int ret = select(maxfd+1, fds, NULL, NULL, &maxSelectTime); + + if (ret == 0){ +#if defined(__linux__) + _elapsedTime += ( 10000 - maxSelectTime.tv_usec ) / 1000; +#else + _elapsedTime += 10; +#endif continue; } - disableNagle(s); - if ( ! cmdLine.quiet ) log() << "connection accepted from " << from.toString() << " #" << ++connNumber << endl; - accepted( new MessagingPort(s, from) ); + _elapsedTime += ret; // assume 1ms to grab connection. very rough + + if (ret < 0){ + int x = errno; +#ifdef EINTR + if ( x == EINTR ){ + log() << "select() signal caught, continuing" << endl; + continue; + } +#endif + if ( ! inShutdown() ) + log() << "select() failure: ret=" << ret << " " << errnoWithDescription(x) << endl; + return; + } + + for (vector::iterator it=socks.begin(), end=socks.end(); it != end; ++it){ + if (! (FD_ISSET(*it, fds))) + continue; + + SockAddr from; + int s = accept(*it, from.raw(), &from.addressSize); + if ( s < 0 ) { + int x = errno; // so no global issues + if ( x == ECONNABORTED || x == EBADF ) { + log() << "Listener on port " << _port << " aborted" << endl; + return; + } + if ( x == 0 && inShutdown() ) { + return; // socket closed + } + if( !inShutdown() ) + log() << "Listener: accept() returns " << s << " " << errnoWithDescription(x) << endl; + continue; + } + if (from.getType() != AF_UNIX) + disableNagle(s); + if ( _logConnect && ! cmdLine.quiet ) + log() << "connection accepted from " << from.toString() << " #" << ++connNumber << endl; + accepted(s, from); + } } } + void Listener::accepted(int sock, const SockAddr& from){ + accepted( new MessagingPort(sock, from) ); + } + /* messagingport -------------------------------------------------------------- */ class PiggyBackData { @@ -101,52 +233,50 @@ } ~PiggyBackData() { - flush(); - delete( _cur ); + DESTRUCTOR_GUARD ( + flush(); + delete[]( _cur ); + ); } void append( Message& m ) { - assert( m.data->len <= 1300 ); + assert( m.header()->len <= 1300 ); - if ( len() + m.data->len > 1300 ) + if ( len() + m.header()->len > 1300 ) flush(); - memcpy( _cur , m.data , m.data->len ); - _cur += m.data->len; + memcpy( _cur , m.singleData() , m.header()->len ); + _cur += m.header()->len; } - int flush() { + void flush() { if ( _buf == _cur ) - return 0; + return; - int x = _port->send( _buf , len() ); + _port->send( _buf , len(), "flush" ); _cur = _buf; - return x; } - int len() { - return _cur - _buf; - } + int len() const { return _cur - _buf; } private: - MessagingPort* _port; - char * _buf; char * _cur; }; class Ports { - set& ports; + set ports; mongo::mutex m; public: - // we "new" this so it is still be around when other automatic global vars - // are being destructed during termination. - Ports() : ports( *(new set()) ) {} - void closeAll() { \ + Ports() : ports(), m("Ports") {} + void closeAll(unsigned skip_mask) { scoped_lock bl(m); - for ( set::iterator i = ports.begin(); i != ports.end(); i++ ) + for ( set::iterator i = ports.begin(); i != ports.end(); i++ ) { + if( (*i)->tag & skip_mask ) + continue; (*i)->shutdown(); + } } void insert(MessagingPort* p) { scoped_lock bl(m); @@ -156,22 +286,27 @@ scoped_lock bl(m); ports.erase(p); } - } ports; - + }; + // we "new" this so it is still be around when other automatic global vars + // are being destructed during termination. + Ports& ports = *(new Ports()); - void closeAllSockets() { - ports.closeAll(); + void MessagingPort::closeAllSockets(unsigned mask) { + ports.closeAll(mask); } - MessagingPort::MessagingPort(int _sock, SockAddr& _far) : sock(_sock), piggyBackData(0), farEnd(_far) { + MessagingPort::MessagingPort(int _sock, const SockAddr& _far) : sock(_sock), piggyBackData(0), farEnd(_far), _timeout(), tag(0) { + _logLevel = 0; ports.insert(this); } - MessagingPort::MessagingPort() { + MessagingPort::MessagingPort( int timeout, int ll ) : tag(0) { + _logLevel = ll; ports.insert(this); sock = -1; piggyBackData = 0; + _timeout = timeout; } void MessagingPort::shutdown() { @@ -194,42 +329,30 @@ int res; SockAddr farEnd; void run() { - res = ::connect(sock, (sockaddr *) &farEnd.sa, farEnd.addressSize); + res = ::connect(sock, farEnd.raw(), farEnd.addressSize); } + string name() { return "ConnectBG"; } }; bool MessagingPort::connect(SockAddr& _far) { farEnd = _far; - sock = socket(AF_INET, SOCK_STREAM, 0); + sock = socket(farEnd.getType(), SOCK_STREAM, 0); if ( sock == INVALID_SOCKET ) { - log() << "ERROR: connect(): invalid socket? " << OUTPUT_ERRNO << endl; + log(_logLevel) << "ERROR: connect invalid socket " << errnoWithDescription() << endl; return false; } -#if 0 - long fl = fcntl(sock, F_GETFL, 0); - assert( fl >= 0 ); - fl |= O_NONBLOCK; - fcntl(sock, F_SETFL, fl); - - int res = ::connect(sock, (sockaddr *) &farEnd.sa, farEnd.addressSize); - if ( res ) { - if ( errno == EINPROGRESS ) - closesocket(sock); - sock = -1; - return false; + if ( _timeout > 0 ) { + setSockTimeouts( sock, _timeout ); } - -#endif - + ConnectBG bg; bg.sock = sock; bg.farEnd = farEnd; bg.go(); - // int res = ::connect(sock, (sockaddr *) &farEnd.sa, farEnd.addressSize); if ( bg.wait(5000) ) { if ( bg.res ) { closesocket(sock); @@ -245,7 +368,8 @@ return false; } - disableNagle(sock); + if (farEnd.getType() != AF_UNIX) + disableNagle(sock); #ifdef SO_NOSIGPIPE // osx @@ -257,94 +381,65 @@ } bool MessagingPort::recv(Message& m) { -again: - mmm( out() << "* recv() sock:" << this->sock << endl; ) - int len = -1; - - char *lenbuf = (char *) &len; - int lft = 4; - while ( 1 ) { - int x = recv( lenbuf, lft ); - if ( x == 0 ) { - DEV out() << "MessagingPort recv() conn closed? " << farEnd.toString() << endl; - m.reset(); - return false; - } - if ( x < 0 ) { - log() << "MessagingPort recv() " << OUTPUT_ERRNO << " " << farEnd.toString()<sock << endl; ) + int len = -1; + + char *lenbuf = (char *) &len; + int lft = 4; + recv( lenbuf, lft ); + + if ( len < 16 || len > 16000000 ) { // messages must be large enough for headers + if ( len == -1 ) { + // Endian check from the database, after connecting, to see what mode server is running in. + unsigned foo = 0x10203040; + send( (char *) &foo, 4, "endian" ); + goto again; + } + + if ( len == 542393671 ){ + // an http GET + log(_logLevel) << "looks like you're trying to access db over http on native driver port. please add 1000 for webserver" << endl; + string msg = "You are trying to access MongoDB on the native driver port. For http diagnostic access, add 1000 to the port number\n"; + stringstream ss; + ss << "HTTP/1.0 200 OK\r\nConnection: close\r\nContent-Type: text/plain\r\nContent-Length: " << msg.size() << "\r\n\r\n" << msg; + string s = ss.str(); + send( s.c_str(), s.size(), "http" ); return false; } - goto again; - } - - if ( len == 542393671 ){ - // an http GET - log() << "looks like you're trying to access db over http on native driver port. please add 1000 for webserver" << endl; - string msg = "You are trying to access MongoDB on the native driver port. For http diagnostic access, add 1000 to the port number\n"; - stringstream ss; - ss << "HTTP/1.0 200 OK\r\nConnection: close\r\nContent-Type: text/plain\r\nContent-Length: " << msg.size() << "\r\n\r\n" << msg; - string s = ss.str(); - send( s.c_str(), s.size() ); + log(_logLevel) << "bad recv() len: " << len << '\n'; return false; } - log() << "bad recv() len: " << len << '\n'; - return false; - } - - int z = (len+1023)&0xfffffc00; - assert(z>=len); - MsgData *md = (MsgData *) malloc(z); - md->len = len; - - if ( len <= 0 ) { - out() << "got a length of " << len << ", something is wrong" << endl; - return false; - } - - char *p = (char *) &md->id; - int left = len -4; - while ( 1 ) { - int x = recv( p, left ); - if ( x == 0 ) { - DEV out() << "MessagingPort::recv(): conn closed? " << farEnd.toString() << endl; - m.reset(); - return false; + + int z = (len+1023)&0xfffffc00; + assert(z>=len); + MsgData *md = (MsgData *) malloc(z); + assert(md); + md->len = len; + + char *p = (char *) &md->id; + int left = len -4; + + try { + recv( p, left ); + } catch (...) { + free(md); + throw; } - if ( x < 0 ) { - log() << "MessagingPort recv() " << OUTPUT_ERRNO << ' ' << farEnd.toString() << endl; - m.reset(); - return false; - } - left -= x; - p += x; - if ( left <= 0 ) - break; + + m.setData(md, true); + return true; + + } catch ( const SocketException & e ) { + log(_logLevel + (e.shouldPrint() ? 0 : 1) ) << "SocketException: " << e << endl; + m.reset(); + return false; } - - m.setData(md, true); - return true; } - + void MessagingPort::reply(Message& received, Message& response) { - say(/*received.from, */response, received.data->id); + say(/*received.from, */response, received.header()->id); } void MessagingPort::reply(Message& received, Message& response, MSGID responseTo) { @@ -352,79 +447,171 @@ } bool MessagingPort::call(Message& toSend, Message& response) { - mmm( out() << "*call()" << endl; ) - MSGID old = toSend.data->id; + mmm( log() << "*call()" << endl; ) + MSGID old = toSend.header()->id; say(/*to,*/ toSend); while ( 1 ) { bool ok = recv(response); if ( !ok ) return false; - //out() << "got response: " << response.data->responseTo << endl; - if ( response.data->responseTo == toSend.data->id ) + //log() << "got response: " << response.data->responseTo << endl; + if ( response.header()->responseTo == toSend.header()->id ) break; - out() << "********************" << endl; - out() << "ERROR: MessagingPort::call() wrong id got:" << (unsigned)response.data->responseTo << " expect:" << (unsigned)toSend.data->id << endl; - out() << " toSend op: " << toSend.data->operation() << " old id:" << (unsigned)old << endl; - out() << " response msgid:" << (unsigned)response.data->id << endl; - out() << " response len: " << (unsigned)response.data->len << endl; - out() << " response op: " << response.data->operation() << endl; - out() << " farEnd: " << farEnd << endl; + log() << "********************" << endl; + log() << "ERROR: MessagingPort::call() wrong id got:" << hex << (unsigned)response.header()->responseTo << " expect:" << (unsigned)toSend.header()->id << endl; + log() << " toSend op: " << toSend.operation() << " old id:" << (unsigned)old << endl; + log() << " response msgid:" << (unsigned)response.header()->id << endl; + log() << " response len: " << (unsigned)response.header()->len << endl; + log() << " response op: " << response.operation() << endl; + log() << " farEnd: " << farEnd << endl; assert(false); response.reset(); } - mmm( out() << "*call() end" << endl; ) + mmm( log() << "*call() end" << endl; ) return true; } void MessagingPort::say(Message& toSend, int responseTo) { - assert( toSend.data ); - mmm( out() << "* say() sock:" << this->sock << " thr:" << GetCurrentThreadId() << endl; ) - toSend.data->id = nextMessageId(); - toSend.data->responseTo = responseTo; - - int x = -100; + assert( !toSend.empty() ); + mmm( log() << "* say() sock:" << this->sock << " thr:" << GetCurrentThreadId() << endl; ) + toSend.header()->id = nextMessageId(); + toSend.header()->responseTo = responseTo; if ( piggyBackData && piggyBackData->len() ) { - mmm( out() << "* have piggy back" << endl; ) - if ( ( piggyBackData->len() + toSend.data->len ) > 1300 ) { + mmm( log() << "* have piggy back" << endl; ) + if ( ( piggyBackData->len() + toSend.header()->len ) > 1300 ) { // won't fit in a packet - so just send it off piggyBackData->flush(); } else { piggyBackData->append( toSend ); - x = piggyBackData->flush(); + piggyBackData->flush(); + return; } } - if ( x == -100 ) - x = send( (char*)toSend.data, toSend.data->len ); - - if ( x <= 0 ) { - log() << "MessagingPort say send() " << OUTPUT_ERRNO << ' ' << farEnd.toString() << endl; - throw SocketException(); - } - + toSend.send( *this, "say" ); } - int MessagingPort::send( const char * data , const int len ){ - return ::send( sock , data , len , portSendFlags ); + // sends all data or throws an exception + void MessagingPort::send( const char * data , int len, const char *context ) { + while( len > 0 ) { + int ret = ::send( sock , data , len , portSendFlags ); + if ( ret == -1 ) { + if ( errno != EAGAIN || _timeout == 0 ) { + log(_logLevel) << "MessagingPort " << context << " send() " << errnoWithDescription() << ' ' << farEnd.toString() << endl; + throw SocketException( SocketException::SEND_ERROR ); + } else { + if ( !serverAlive( farEnd.toString() ) ) { + log(_logLevel) << "MessagingPort " << context << " send() remote dead " << farEnd.toString() << endl; + throw SocketException( SocketException::SEND_ERROR ); + } + } + } else { + assert( ret <= len ); + len -= ret; + data += ret; + } + } } - int MessagingPort::recv( char * buf , int max ){ - return ::recv( sock , buf , max , portRecvFlags ); + // sends all data or throws an exception + void MessagingPort::send( const vector< pair< char *, int > > &data, const char *context ){ +#if defined(_WIN32) + // TODO use scatter/gather api + for( vector< pair< char *, int > >::const_iterator i = data.begin(); i != data.end(); ++i ) { + char * data = i->first; + int len = i->second; + send( data, len, context ); + } +#else + vector< struct iovec > d( data.size() ); + int i = 0; + for( vector< pair< char *, int > >::const_iterator j = data.begin(); j != data.end(); ++j ) { + if ( j->second > 0 ) { + d[ i ].iov_base = j->first; + d[ i ].iov_len = j->second; + ++i; + } + } + struct msghdr meta; + memset( &meta, 0, sizeof( meta ) ); + meta.msg_iov = &d[ 0 ]; + meta.msg_iovlen = d.size(); + + while( meta.msg_iovlen > 0 ) { + int ret = ::sendmsg( sock , &meta , portSendFlags ); + if ( ret == -1 ) { + if ( errno != EAGAIN || _timeout == 0 ) { + log(_logLevel) << "MessagingPort " << context << " send() " << errnoWithDescription() << ' ' << farEnd.toString() << endl; + throw SocketException( SocketException::SEND_ERROR ); + } else { + if ( !serverAlive( farEnd.toString() ) ) { + log(_logLevel) << "MessagingPort " << context << " send() remote dead " << farEnd.toString() << endl; + throw SocketException( SocketException::SEND_ERROR ); + } + } + } else { + struct iovec *& i = meta.msg_iov; + while( ret > 0 ) { + if ( i->iov_len > unsigned( ret ) ) { + i->iov_len -= ret; + i->iov_base = (char*)(i->iov_base) + ret; + ret = 0; + } else { + ret -= i->iov_len; + ++i; + --(meta.msg_iovlen); + } + } + } + } +#endif } + void MessagingPort::recv( char * buf , int len ){ + while( len > 0 ) { + int ret = ::recv( sock , buf , len , portRecvFlags ); + if ( ret == 0 ) { + log(3) << "MessagingPort recv() conn closed? " << farEnd.toString() << endl; + throw SocketException( SocketException::CLOSED ); + } + if ( ret == -1 ) { + int e = errno; + if ( e != EAGAIN || _timeout == 0 ) { + log(_logLevel) << "MessagingPort recv() " << errnoWithDescription(e) << " " << farEnd.toString() < 20000 ) + max = 20000; + + return max; +#endif + } + + void checkTicketNumbers(){ + connTicketHolder.resize( getMaxConnections() ); + } + + TicketHolder connTicketHolder(20000); + + namespace { + map isSelfCache; // host, isSelf + } + + bool HostAndPort::isSelf() const { + int p = _port == -1 ? CmdLine::DefaultDBPort : _port; + + if( p != cmdLine.port ){ + return false; + } else if (sameHostname(getHostName(), _host) || isLocalHost()) { + return true; + } else { + map::const_iterator it = isSelfCache.find(_host); + if (it != isSelfCache.end()){ + return it->second; + } + + SockAddr addr (_host.c_str(), 0); // port 0 is dynamically assigned + SOCKET sock = ::socket(addr.getType(), SOCK_STREAM, 0); + assert(sock != INVALID_SOCKET); + + bool ret = (::bind(sock, addr.raw(), addr.addressSize) == 0); + isSelfCache[_host] = ret; + + closesocket(sock); + + return ret; + } + } + } // namespace mongo diff -Nru mongodb-1.4.4/util/message.h mongodb-1.6.3/util/message.h --- mongodb-1.4.4/util/message.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/message.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -// message.h +// Message.h /* Copyright 2009 10gen Inc. * @@ -18,44 +18,88 @@ #pragma once #include "../util/sock.h" -#include "../util/atomic_int.h" +#include "../bson/util/atomic_int.h" +#include "hostandport.h" namespace mongo { + extern bool noUnixSocket; + class Message; class MessagingPort; class PiggyBackData; typedef AtomicUInt MSGID; - class Listener { + class Listener : boost::noncopyable { public: - Listener(const string &_ip, int p) : ip(_ip), port(p) { } - virtual ~Listener() {} - bool init(); // set up socket - int socket() const { return sock; } - void listen(); // never returns (start a thread) + Listener(const string &ip, int p, bool logConnect=true ) : _port(p), _ip(ip), _logConnect(logConnect), _elapsedTime(0){ } + virtual ~Listener() { + if ( _timeTracker == this ) + _timeTracker = 0; + } + void initAndListen(); // never returns unless error (start a thread) /* spawn a thread, etc., then return */ - virtual void accepted(MessagingPort *mp) = 0; + virtual void accepted(int sock, const SockAddr& from); + virtual void accepted(MessagingPort *mp){ + assert(!"You must overwrite one of the accepted methods"); + } + + const int _port; + + /** + * @return a rough estimate of elepased time since the server started + */ + long long getMyElapsedTimeMillis() const { return _elapsedTime; } + + void setAsTimeTracker(){ + _timeTracker = this; + } + + static const Listener* getTimeTracker(){ + return _timeTracker; + } + + static long long getElapsedTimeMillis() { + if ( _timeTracker ) + return _timeTracker->getMyElapsedTimeMillis(); + return 0; + } + private: - string ip; - int port; - int sock; + string _ip; + bool _logConnect; + long long _elapsedTime; + + static const Listener* _timeTracker; }; - class AbstractMessagingPort { + class AbstractMessagingPort : boost::noncopyable { public: virtual ~AbstractMessagingPort() { } virtual void reply(Message& received, Message& response, MSGID responseTo) = 0; // like the reply below, but doesn't rely on received.data still being available virtual void reply(Message& received, Message& response) = 0; - virtual unsigned remotePort() = 0 ; + virtual HostAndPort remote() const = 0; + virtual unsigned remotePort() const = 0; + + virtual int getClientId(){ + int x = remotePort(); + x = x << 16; + x |= ( ( 0xFF0 & (long long)this ) >> 8 ); // lowest byte in pointer often meaningless + return x; + } }; class MessagingPort : public AbstractMessagingPort { public: - MessagingPort(int sock, SockAddr& farEnd); - MessagingPort(); + MessagingPort(int sock, const SockAddr& farEnd); + + // in some cases the timeout will actually be 2x this value - eg we do a partial send, + // then the timeout fires, then we try to send again, then the timeout fires again with + // no data sent, then we detect that the other side is down + MessagingPort(int timeout = 0, int logLevel = 0 ); + virtual ~MessagingPort(); void shutdown(); @@ -73,22 +117,33 @@ void piggyBack( Message& toSend , int responseTo = -1 ); - virtual unsigned remotePort(); + virtual unsigned remotePort() const; + virtual HostAndPort remote() const; + + // send len or throw SocketException + void send( const char * data , int len, const char *context ); + void send( const vector< pair< char *, int > > &data, const char *context ); - int send( const char * data , const int len ); - int recv( char * data , int max ); + // recv len or throw SocketException + void recv( char * data , int len ); + + int unsafe_recv( char *buf, int max ); private: int sock; PiggyBackData * piggyBackData; public: SockAddr farEnd; + int _timeout; + int _logLevel; // passed to log() when logging errors + + static void closeAllSockets(unsigned tagMask = 0xffffffff); + + /* ports can be tagged with various classes. see closeAllSockets(tag). defaults to 0. */ + unsigned tag; friend class PiggyBackData; }; - //#pragma pack() -#pragma pack(1) - enum Operations { opReply = 1, /* reply. responseTo is set. */ dbMsg = 1000, /* generic msg command followed by a string */ @@ -120,7 +175,52 @@ return ""; } } + + inline bool opIsWrite( int op ){ + switch ( op ){ + + case 0: + case opReply: + case dbMsg: + case dbQuery: + case dbGetMore: + case dbKillCursors: + return false; + + case dbUpdate: + case dbInsert: + case dbDelete: + return false; + + default: + PRINT(op); + assert(0); + return ""; + } + + } +#pragma pack(1) +/* see http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol +*/ +struct MSGHEADER { + int messageLength; // total message size, including this + int requestID; // identifier for this message + int responseTo; // requestID from the original request + // (used in reponses from db) + int opCode; +}; +struct OP_GETMORE : public MSGHEADER { + MSGHEADER header; // standard message header + int ZERO_or_flags; // 0 - reserved for future use + //cstring fullCollectionName; // "dbname.collectionname" + //int32 numberToReturn; // number of documents to return + //int64 cursorID; // cursorID from the OP_REPLY +}; +#pragma pack() + +#pragma pack(1) + /* todo merge this with MSGHEADER (or inherit from it). */ struct MsgData { int len; /* len of the msg, including this field */ MSGID id; /* request/reply id's match... */ @@ -146,88 +246,232 @@ return true; } + long long getCursor(){ + assert( responseTo > 0 ); + assert( _operation == opReply ); + long long * l = (long long *)(_data + 4); + return l[0]; + } + int dataLen(); // len without header }; const int MsgDataHeaderSize = sizeof(MsgData) - 4; inline int MsgData::dataLen() { return len - MsgDataHeaderSize; } - #pragma pack() class Message { public: - Message() { - data = 0; - freeIt = false; - } - Message( void * _data , bool _freeIt ) { - data = (MsgData*)_data; - freeIt = _freeIt; + // we assume here that a vector with initial size 0 does no allocation (0 is the default, but wanted to make it explicit). + Message() : _buf( 0 ), _data( 0 ), _freeIt( false ) {} + Message( void * data , bool freeIt ) : + _buf( 0 ), _data( 0 ), _freeIt( false ) { + _setData( reinterpret_cast< MsgData* >( data ), freeIt ); }; + Message(Message& r) : _buf( 0 ), _data( 0 ), _freeIt( false ) { + *this = r; + } ~Message() { reset(); } - SockAddr from; - MsgData *data; + SockAddr _from; - int operation() const { - return data->operation(); + MsgData *header() const { + assert( !empty() ); + return _buf ? _buf : reinterpret_cast< MsgData* > ( _data[ 0 ].first ); } - + int operation() const { return header()->operation(); } + + MsgData *singleData() const { + massert( 13273, "single data buffer expected", _buf ); + return header(); + } + + bool empty() const { return !_buf && _data.empty(); } + + int size() const{ + int res = 0; + if ( _buf ){ + res = _buf->len; + } else { + for (MsgVec::const_iterator it = _data.begin(); it != _data.end(); ++it){ + res += it->second; + } + } + return res; + } + + // concat multiple buffers - noop if <2 buffers already, otherwise can be expensive copy + // can get rid of this if we make response handling smarter + void concat() { + if ( _buf || empty() ) { + return; + } + + assert( _freeIt ); + int totalSize = 0; + for( vector< pair< char *, int > >::const_iterator i = _data.begin(); i != _data.end(); ++i ) { + totalSize += i->second; + } + char *buf = (char*)malloc( totalSize ); + char *p = buf; + for( vector< pair< char *, int > >::const_iterator i = _data.begin(); i != _data.end(); ++i ) { + memcpy( p, i->first, i->second ); + p += i->second; + } + reset(); + _setData( (MsgData*)buf, true ); + } + + // vector swap() so this is fast Message& operator=(Message& r) { - assert( data == 0 ); - data = r.data; - assert( r.freeIt ); - r.freeIt = false; - r.data = 0; - freeIt = true; + assert( empty() ); + assert( r._freeIt ); + _buf = r._buf; + r._buf = 0; + if ( r._data.size() > 0 ) { + _data.swap( r._data ); + } + r._freeIt = false; + _freeIt = true; return *this; } void reset() { - if ( freeIt && data ) - free(data); - data = 0; - freeIt = false; + if ( _freeIt ) { + if ( _buf ) { + free( _buf ); + } + for( vector< pair< char *, int > >::const_iterator i = _data.begin(); i != _data.end(); ++i ) { + free(i->first); + } + } + _buf = 0; + _data.clear(); + _freeIt = false; + } + + // use to add a buffer + // assumes message will free everything + void appendData(char *d, int size) { + if ( size <= 0 ) { + return; + } + if ( empty() ) { + MsgData *md = (MsgData*)d; + md->len = size; // can be updated later if more buffers added + _setData( md, true ); + return; + } + assert( _freeIt ); + if ( _buf ) { + _data.push_back( make_pair( (char*)_buf, _buf->len ) ); + _buf = 0; + } + _data.push_back( make_pair( d, size ) ); + header()->len += size; } - - void setData(MsgData *d, bool _freeIt) { - assert( data == 0 ); - freeIt = _freeIt; - data = d; + + // use to set first buffer if empty + void setData(MsgData *d, bool freeIt) { + assert( empty() ); + _setData( d, freeIt ); } void setData(int operation, const char *msgtxt) { setData(operation, msgtxt, strlen(msgtxt)+1); } void setData(int operation, const char *msgdata, size_t len) { - assert(data == 0); + assert( empty() ); size_t dataLen = len + sizeof(MsgData) - 4; MsgData *d = (MsgData *) malloc(dataLen); memcpy(d->_data, msgdata, len); d->len = fixEndian(dataLen); d->setOperation(operation); - freeIt= true; - data = d; + _setData( d, true ); } bool doIFreeIt() { - return freeIt; + return _freeIt; + } + + void send( MessagingPort &p, const char *context ) { + if ( empty() ) { + return; + } + if ( _buf != 0 ) { + p.send( (char*)_buf, _buf->len, context ); + } else { + p.send( _data, context ); + } } private: - bool freeIt; + void _setData( MsgData *d, bool freeIt ) { + _freeIt = freeIt; + _buf = d; + } + // if just one buffer, keep it in _buf, otherwise keep a sequence of buffers in _data + MsgData * _buf; + // byte buffer(s) - the first must contain at least a full MsgData unless using _buf for storage instead + typedef vector< pair< char*, int > > MsgVec; + MsgVec _data; + bool _freeIt; }; class SocketException : public DBException { public: - virtual const char* what() const throw() { return "socket exception"; } - virtual int getCode(){ return 9001; } + enum Type { CLOSED , RECV_ERROR , SEND_ERROR } type; + SocketException( Type t ) : DBException( "socket exception" , 9001 ) , type(t){} + + bool shouldPrint() const { + return type != CLOSED; + } + }; MSGID nextMessageId(); void setClientId( int id ); int getClientId(); + + extern TicketHolder connTicketHolder; + + class ElapsedTracker { + public: + ElapsedTracker( int hitsBetweenMarks , int msBetweenMarks ) + : _h( hitsBetweenMarks ) , _ms( msBetweenMarks ) , _pings(0){ + _last = Listener::getElapsedTimeMillis(); + } + + /** + * call this for every iteration + * returns true if one of the triggers has gone off + */ + bool ping(){ + if ( ( ++_pings % _h ) == 0 ){ + _last = Listener::getElapsedTimeMillis(); + return true; + } + + long long now = Listener::getElapsedTimeMillis(); + if ( now - _last > _ms ){ + _last = now; + return true; + } + + return false; + } + + private: + int _h; + int _ms; + + unsigned long long _pings; + + long long _last; + + }; + } // namespace mongo diff -Nru mongodb-1.4.4/util/message_server_asio.cpp mongodb-1.6.3/util/message_server_asio.cpp --- mongodb-1.4.4/util/message_server_asio.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/message_server_asio.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -27,7 +27,7 @@ #include "message.h" #include "message_server.h" -#include "../util/mvar.h" +#include "../util/concurrency/mvar.h" using namespace boost; using namespace boost::asio; @@ -204,9 +204,11 @@ class AsyncMessageServer : public MessageServer { public: - AsyncMessageServer( int port , MessageHandler * handler ) - : MessageServer( port , handler ) - , _endpoint( tcp::v4() , port ) + // TODO accept an IP address to bind to + AsyncMessageServer( const MessageServer::Options& opts , MessageHandler * handler ) + : _port( opts.port ) + , _handler(handler) + , _endpoint( tcp::v4() , opts.port ) , _acceptor( _ioservice , _endpoint ) { _accept(); @@ -232,7 +234,7 @@ _accept(); } - void _accept(){ + void _accept( ){ shared_ptr session( new MessageServerSession( _handler , _ioservice ) ); _acceptor.async_accept( session->socket() , boost::bind( &AsyncMessageServer::handleAccept, @@ -243,13 +245,15 @@ } private: + int _port; + MessageHandler * _handler; io_service _ioservice; tcp::endpoint _endpoint; tcp::acceptor _acceptor; }; - MessageServer * createServer( int port , MessageHandler * handler ){ - return new AsyncMessageServer( port , handler ); + MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler ){ + return new AsyncMessageServer( opts , handler ); } } diff -Nru mongodb-1.4.4/util/message_server.h mongodb-1.6.3/util/message_server.h --- mongodb-1.4.4/util/message_server.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/message_server.h 2010-09-24 10:02:42.000000000 -0700 @@ -22,7 +22,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" namespace mongo { @@ -30,20 +30,23 @@ public: virtual ~MessageHandler(){} virtual void process( Message& m , AbstractMessagingPort* p ) = 0; + virtual void disconnected( AbstractMessagingPort* p ) = 0; }; - + class MessageServer { public: - MessageServer( int port , MessageHandler * handler ) : _port( port ) , _handler( handler ){} - virtual ~MessageServer(){} + struct Options { + int port; // port to bind to + string ipList; // addresses to bind to + Options() : port(0), ipList(""){} + }; + + virtual ~MessageServer(){} virtual void run() = 0; - - protected: - - int _port; - MessageHandler* _handler; + virtual void setAsTimeTracker() = 0; }; - MessageServer * createServer( int port , MessageHandler * handler ); + // TODO use a factory here to decide between port and asio variations + MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler ); } diff -Nru mongodb-1.4.4/util/message_server_port.cpp mongodb-1.6.3/util/message_server_port.cpp --- mongodb-1.4.4/util/message_server_port.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/message_server_port.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,32 +15,41 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #ifndef USE_ASIO #include "message.h" #include "message_server.h" +#include "../db/cmdline.h" + namespace mongo { namespace pms { - MessagingPort * grab = 0; MessageHandler * handler; - - void threadRun(){ - assert( grab ); - auto_ptr p( grab ); - grab = 0; + + void threadRun( MessagingPort * inPort){ + assert( inPort ); + setThreadName( "conn" ); + TicketHolderReleaser connTicketReleaser( &connTicketHolder ); + + auto_ptr p( inPort ); + + string otherSide; + Message m; try { + otherSide = p->farEnd.toString(); + while ( 1 ){ m.reset(); if ( ! p->recv(m) ) { - log() << "end connection " << p->farEnd.toString() << endl; + if( !cmdLine.quiet ) + log() << "end connection " << otherSide << endl; p->shutdown(); break; } @@ -48,42 +57,69 @@ handler->process( m , p.get() ); } } + catch ( const SocketException& ){ + log() << "unclean socket shutdown from: " << otherSide << endl; + } + catch ( const std::exception& e ){ + problem() << "uncaught exception (" << e.what() << ")(" << demangleName( typeid(e) ) <<") in PortMessageServer::threadRun, closing connection" << endl; + } catch ( ... ){ problem() << "uncaught exception in PortMessageServer::threadRun, closing connection" << endl; } + handler->disconnected( p.get() ); } } class PortMessageServer : public MessageServer , public Listener { public: - PortMessageServer( int port , MessageHandler * handler ) : - MessageServer( port , handler ) , - Listener( "", port ){ + PortMessageServer( const MessageServer::Options& opts, MessageHandler * handler ) : + Listener( opts.ipList, opts.port ){ uassert( 10275 , "multiple PortMessageServer not supported" , ! pms::handler ); pms::handler = handler; } virtual void accepted(MessagingPort * p) { - assert( ! pms::grab ); - pms::grab = p; - boost::thread thr( pms::threadRun ); - while ( pms::grab ) - sleepmillis(1); + + if ( ! connTicketHolder.tryAcquire() ){ + log() << "connection refused because too many open connections: " << connTicketHolder.used() << endl; + + // TODO: would be nice if we notified them... + p->shutdown(); + delete p; + + sleepmillis(2); // otherwise we'll hard loop + return; + } + + try { + boost::thread thr( boost::bind( &pms::threadRun , p ) ); + } + catch ( boost::thread_resource_error& ){ + log() << "can't create new thread, closing connection" << endl; + + p->shutdown(); + delete p; + + sleepmillis(2); + } } + virtual void setAsTimeTracker(){ + Listener::setAsTimeTracker(); + } + void run(){ - assert( init() ); - listen(); + initAndListen(); } }; - MessageServer * createServer( int port , MessageHandler * handler ){ - return new PortMessageServer( port , handler ); + MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler ){ + return new PortMessageServer( opts , handler ); } } diff -Nru mongodb-1.4.4/util/miniwebserver.cpp mongodb-1.6.3/util/miniwebserver.cpp --- mongodb-1.4.4/util/miniwebserver.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/miniwebserver.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "miniwebserver.h" #include "hex.h" @@ -23,52 +23,22 @@ namespace mongo { - MiniWebServer::MiniWebServer() { - sock = 0; - } - - bool MiniWebServer::init(const string &ip, int _port) { - port = _port; - SockAddr me; - if ( ip.empty() ) - me = SockAddr( port ); - else - me = SockAddr( ip.c_str(), port ); - sock = ::socket(AF_INET, SOCK_STREAM, 0); - if ( sock == INVALID_SOCKET ) { - log() << "ERROR: MiniWebServer listen(): invalid socket? " << OUTPUT_ERRNO << endl; - return false; - } - prebindOptions( sock ); - if ( ::bind(sock, (sockaddr *) &me.sa, me.addressSize) != 0 ) { - log() << "MiniWebServer: bind() failed port:" << port << " " << OUTPUT_ERRNO << endl; - if ( errno == EADDRINUSE ) - log() << " addr already in use" << endl; - closesocket(sock); - return false; - } - - if ( ::listen(sock, 16) != 0 ) { - log() << "MiniWebServer: listen() failed " << OUTPUT_ERRNO << endl; - closesocket(sock); - return false; - } - - return true; - } + MiniWebServer::MiniWebServer(const string &ip, int port) + : Listener(ip, port, false) + {} string MiniWebServer::parseURL( const char * buf ) { - const char * urlStart = strstr( buf , " " ); + const char * urlStart = strchr( buf , ' ' ); if ( ! urlStart ) return "/"; urlStart++; - const char * end = strstr( urlStart , " " ); + const char * end = strchr( urlStart , ' ' ); if ( ! end ) { - end = strstr( urlStart , "\r" ); + end = strchr( urlStart , '\r' ); if ( ! end ) { - end = strstr( urlStart , "\n" ); + end = strchr( urlStart , '\n' ); } } @@ -105,14 +75,14 @@ if ( eq == string::npos ) continue; - b.append( urlDecode(cur.substr(0,eq)).c_str() , urlDecode(cur.substr(eq+1) ) ); + b.append( urlDecode(cur.substr(0,eq)) , urlDecode(cur.substr(eq+1) ) ); } params = b.obj(); } string MiniWebServer::parseMethod( const char * headers ) { - const char * end = strstr( headers , " " ); + const char * end = strchr( headers , ' ' ); if ( ! end ) return "GET"; return string( headers , (int)(end-headers) ); @@ -139,17 +109,23 @@ } void MiniWebServer::accepted(int s, const SockAddr &from) { + setSockTimeouts(s, 8); char buf[4096]; int len = 0; while ( 1 ) { - int x = ::recv(s, buf + len, sizeof(buf) - 1 - len, 0); + int left = sizeof(buf) - 1 - len; + if( left == 0 ) + break; + int x = ::recv(s, buf + len, left, 0); if ( x <= 0 ) { + closesocket(s); return; } len += x; buf[ len ] = 0; - if ( fullReceive( buf ) ) + if ( fullReceive( buf ) ) { break; + } } buf[len] = 0; @@ -178,18 +154,23 @@ ss << "Content-Type: text/html\r\n"; } else { - for ( vector::iterator i = headers.begin(); i != headers.end(); i++ ) + for ( vector::iterator i = headers.begin(); i != headers.end(); i++ ) { + assert( strncmp("Content-Length", i->c_str(), 14) ); ss << *i << "\r\n"; + } } + ss << "Connection: close\r\n"; + ss << "Content-Length: " << responseMsg.size() << "\r\n"; ss << "\r\n"; ss << responseMsg; string response = ss.str(); ::send(s, response.c_str(), response.size(), 0); + closesocket(s); } string MiniWebServer::getHeader( const char * req , string wanted ){ - const char * headers = strstr( req , "\n" ); + const char * headers = strchr( req , '\n' ); if ( ! headers ) return ""; pcrecpp::StringPiece input( headers + 1 ); @@ -203,26 +184,6 @@ } return ""; } - - void MiniWebServer::run() { - SockAddr from; - while ( ! inShutdown() ) { - int s = accept(sock, (sockaddr *) &from.sa, &from.addressSize); - if ( s < 0 ) { - if ( errno == ECONNABORTED ) { - log() << "Listener on port " << port << " aborted." << endl; - return; - } - log() << "MiniWebServer: accept() returns " << s << " " << OUTPUT_ERRNO << endl; - sleepmillis(200); - continue; - } - disableNagle(s); - RARELY log() << "MiniWebServer: connection accepted from " << from.toString() << endl; - accepted( s, from ); - closesocket(s); - } - } string MiniWebServer::urlDecode(const char* s){ stringstream out; diff -Nru mongodb-1.4.4/util/miniwebserver.h mongodb-1.6.3/util/miniwebserver.h --- mongodb-1.4.4/util/miniwebserver.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/miniwebserver.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,20 +17,17 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "message.h" #include "../db/jsobj.h" namespace mongo { - class MiniWebServer { + class MiniWebServer : public Listener { public: - MiniWebServer(); + MiniWebServer(const string &ip, int _port); virtual ~MiniWebServer() {} - bool init(const string &ip, int _port); - void run(); - virtual void doRequest( const char *rq, // the full request string url, @@ -41,13 +38,13 @@ const SockAddr &from ) = 0; - int socket() const { return sock; } + // --- static helpers ---- + + static void parseParams( BSONObj & params , string query ); - protected: - string parseURL( const char * buf ); - string parseMethod( const char * headers ); - string getHeader( const char * headers , string name ); - void parseParams( BSONObj & params , string query ); + static string parseURL( const char * buf ); + static string parseMethod( const char * headers ); + static string getHeader( const char * headers , string name ); static const char *body( const char *buf ); static string urlDecode(const char* s); @@ -56,9 +53,6 @@ private: void accepted(int s, const SockAddr &from); static bool fullReceive( const char *buf ); - - int port; - int sock; }; } // namespace mongo diff -Nru mongodb-1.4.4/util/mmap.cpp mongodb-1.6.3/util/mmap.cpp --- mongodb-1.4.4/util/mmap.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/mmap.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,36 +15,67 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "mmap.h" #include "processinfo.h" +#include "concurrency/rwlock.h" namespace mongo { - set mmfiles; - mongo::mutex mmmutex; + /*static*/ void MemoryMappedFile::updateLength( const char *filename, long &length ) { + if ( !boost::filesystem::exists( filename ) ) + return; + // make sure we map full length if preexisting file. + boost::uintmax_t l = boost::filesystem::file_size( filename ); + assert( l <= 0x7fffffff ); + length = (long) l; + } - MemoryMappedFile::~MemoryMappedFile() { - close(); - scoped_lock lk( mmmutex ); - mmfiles.erase(this); + void* MemoryMappedFile::map(const char *filename) { + boost::uintmax_t l = boost::filesystem::file_size( filename ); + assert( l <= 0x7fffffff ); + long i = (long)l; + return map( filename , i ); } - void MemoryMappedFile::created(){ - scoped_lock lk( mmmutex ); - mmfiles.insert(this); + void printMemInfo( const char * where ){ + cout << "mem info: "; + if ( where ) + cout << where << " "; + ProcessInfo pi; + if ( ! pi.supported() ){ + cout << " not supported" << endl; + return; + } + + cout << "vsize: " << pi.getVirtualMemorySize() << " resident: " << pi.getResidentSize() << " mapped: " << ( MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) ) << endl; + } + + /* --- MongoFile ------------------------------------------------- + this is the administrative stuff + */ + + static set mmfiles; + static RWLock mmmutex("rw:mmmutex"); + + void MongoFile::destroyed() { + rwlock lk( mmmutex , true ); + mmfiles.erase(this); } /*static*/ - int closingAllFiles = 0; - void MemoryMappedFile::closeAllFiles( stringstream &message ) { + void MongoFile::closeAllFiles( stringstream &message ) { + static int closingAllFiles = 0; if ( closingAllFiles ) { message << "warning closingAllFiles=" << closingAllFiles << endl; return; } ++closingAllFiles; + + rwlock lk( mmmutex , true ); + ProgressMeter pm( mmfiles.size() , 2 , 1 ); - for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){ + for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){ (*i)->close(); pm.hit(); } @@ -52,59 +83,78 @@ --closingAllFiles; } - long long MemoryMappedFile::totalMappedLength(){ + /*static*/ long long MongoFile::totalMappedLength(){ unsigned long long total = 0; - scoped_lock lk( mmmutex ); - for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) + rwlock lk( mmmutex , false ); + for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) total += (*i)->length(); return total; } - int MemoryMappedFile::flushAll( bool sync ){ - int num = 0; - - scoped_lock lk( mmmutex ); - for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){ - num++; - MemoryMappedFile * mmf = *i; - if ( ! mmf ) - continue; - mmf->flush( sync ); + /*static*/ int MongoFile::flushAll( bool sync ){ + if ( ! sync ){ + int num = 0; + rwlock lk( mmmutex , false ); + for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){ + num++; + MongoFile * mmf = *i; + if ( ! mmf ) + continue; + + mmf->flush( sync ); + } + return num; + } + + // want to do it sync + set seen; + while ( true ){ + auto_ptr f; + { + rwlock lk( mmmutex , false ); + for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){ + MongoFile * mmf = *i; + if ( ! mmf ) + continue; + if ( seen.count( mmf ) ) + continue; + f.reset( mmf->prepareFlush() ); + seen.insert( mmf ); + break; + } + } + if ( ! f.get() ) + break; + + f->flush(); } - return num; + return seen.size(); } - - void MemoryMappedFile::updateLength( const char *filename, long &length ) { - if ( !boost::filesystem::exists( filename ) ) - return; - // make sure we map full length if preexisting file. - boost::uintmax_t l = boost::filesystem::file_size( filename ); - assert( l <= 0x7fffffff ); - length = (long) l; + void MongoFile::created(){ + rwlock lk( mmmutex , true ); + mmfiles.insert(this); } - void* MemoryMappedFile::map(const char *filename) { - boost::uintmax_t l = boost::filesystem::file_size( filename ); - assert( l <= 0x7fffffff ); - long i = (long)l; - return map( filename , i ); - } +#ifdef _DEBUG - void printMemInfo( const char * where ){ - cout << "mem info: "; - if ( where ) - cout << where << " "; - ProcessInfo pi; - if ( ! pi.supported() ){ - cout << " not supported" << endl; - return; + void MongoFile::lockAll() { + rwlock lk( mmmutex , false ); + for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){ + MongoFile * mmf = *i; + if (mmf) mmf->_lock(); } - - cout << "vsize: " << pi.getVirtualMemorySize() << " resident: " << pi.getResidentSize() << " mapped: " << ( MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) ) << endl; } + void MongoFile::unlockAll() { + rwlock lk( mmmutex , false ); + for ( set::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){ + MongoFile * mmf = *i; + if (mmf) mmf->_unlock(); + } + } +#endif } // namespace mongo diff -Nru mongodb-1.4.4/util/mmap.h mongodb-1.6.3/util/mmap.h --- mongodb-1.4.4/util/mmap.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/mmap.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,52 +18,184 @@ #pragma once namespace mongo { + + /* the administrative-ish stuff here */ + class MongoFile : boost::noncopyable { + + public: + /** Flushable has to fail nicely if the underlying object gets killed */ + class Flushable { + public: + virtual ~Flushable(){} + virtual void flush() = 0; + }; + + protected: + virtual void close() = 0; + virtual void flush(bool sync) = 0; + /** + * returns a thread safe object that you can call flush on + * Flushable has to fail nicely if the underlying object gets killed + */ + virtual Flushable * prepareFlush() = 0; + + void created(); /* subclass must call after create */ + void destroyed(); /* subclass must call in destructor */ + + // only supporting on posix mmap + virtual void _lock() {} + virtual void _unlock() {} - class MemoryMappedFile { public: + virtual ~MongoFile() {} + virtual long length() = 0; enum Options { - SEQUENTIAL = 1 + SEQUENTIAL = 1 // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows + }; + + static int flushAll( bool sync ); // returns n flushed + static long long totalMappedLength(); + static void closeAllFiles( stringstream &message ); + + // Locking allows writes. Reads are always allowed + static void lockAll(); + static void unlockAll(); + + /* can be "overriden" if necessary */ + static bool exists(boost::filesystem::path p) { + return boost::filesystem::exists(p); + } + }; + +#ifndef _DEBUG + // no-ops in production + inline void MongoFile::lockAll() {} + inline void MongoFile::unlockAll() {} + +#endif + + struct MongoFileAllowWrites { + MongoFileAllowWrites(){ + MongoFile::lockAll(); + } + ~MongoFileAllowWrites(){ + MongoFile::unlockAll(); + } + }; + + /** template for what a new storage engine's class definition must implement + PRELIMINARY - subject to change. + */ + class StorageContainerTemplate : public MongoFile { + protected: + virtual void close(); + virtual void flush(bool sync); + public: + virtual long length(); + + /** pointer to a range of space in this storage unit */ + class Pointer { + public: + /** retried address of buffer at offset 'offset' withing the storage unit. returned range is a contiguous + buffer reflecting what is in storage. caller will not read or write past 'len'. + + note calls may be received that are at different points in a range and different lengths. however + for now assume that on writes, if a call is made, previously returned addresses are no longer valid. i.e. + p = at(10000, 500); + q = at(10000, 600); + after the second call it is ok if p is invalid. + */ + void* at(int offset, int len); + + /** indicate that we wrote to the range (from a previous at() call) and that it needs + flushing to disk. + */ + void written(int offset, int len); + + bool isNull() const; + }; + + /** commit written() calls from above. */ + void commit(); + + Pointer open(const char *filename); + Pointer open(const char *_filename, long &length, int options=0); + }; + + class MemoryMappedFile : public MongoFile { + public: + class Pointer { + char *_base; + public: + Pointer() : _base(0) { } + Pointer(void *p) : _base((char*) p) { } + void* at(int offset, int maxLen) { return _base + offset; } + void grow(int offset, int len) { /* no action required with mem mapped file */ } + bool isNull() const { return _base == 0; } }; MemoryMappedFile(); - ~MemoryMappedFile(); /* closes the file if open */ + ~MemoryMappedFile() { + destroyed(); + close(); + } void close(); - // Throws exception if file doesn't exist. + // Throws exception if file doesn't exist. (dm may2010: not sure if this is always true?) void* map( const char *filename ); + /*To replace map(): + + Pointer open( const char *filename ) { + void *p = map(filename); + uassert(13077, "couldn't open/map file", p); + return Pointer(p); + }*/ + /* Creates with length if DNE, otherwise uses existing file length, passed length. */ void* map(const char *filename, long &length, int options = 0 ); void flush(bool sync); + virtual Flushable * prepareFlush(); - void* viewOfs() { + /*void* viewOfs() { return view; - } + }*/ long length() { return len; } - - static void updateLength( const char *filename, long &length ); - - static long long totalMappedLength(); - static void closeAllFiles( stringstream &message ); - static int flushAll( bool sync ); + + string filename() const { return _filename; } private: - void created(); + static void updateLength( const char *filename, long &length ); HANDLE fd; HANDLE maphandle; void *view; long len; string _filename; + + protected: + // only posix mmap implementations will support this + virtual void _lock(); + virtual void _unlock(); + }; void printMemInfo( const char * where ); +#include "ramstore.h" + +//#define _RAMSTORE +#if defined(_RAMSTORE) + typedef RamStoreFile MMF; +#else + typedef MemoryMappedFile MMF; +#endif + } // namespace mongo diff -Nru mongodb-1.4.4/util/mmap_mm.cpp mongodb-1.6.3/util/mmap_mm.cpp --- mongodb-1.4.4/util/mmap_mm.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/mmap_mm.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -// mmap_mm.cpp +// mmap_mm.cpp - in memory (no file) version /* Copyright 2009 10gen Inc. * @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "mmap.h" /* in memory (no file) version */ @@ -45,6 +45,8 @@ void MemoryMappedFile::flush(bool sync) { } + void MemoryMappedFile::_lock() {} + void MemoryMappedFile::_unlock() {} } diff -Nru mongodb-1.4.4/util/mmap_posix.cpp mongodb-1.6.3/util/mmap_posix.cpp --- mongodb-1.4.4/util/mmap_posix.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/mmap_posix.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,9 +15,10 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "mmap.h" #include "file_allocator.h" +#include "../db/concurrency.h" #include #include @@ -60,7 +61,7 @@ fd = open(filename, O_RDWR | O_NOATIME); if ( fd <= 0 ) { - out() << "couldn't open " << filename << ' ' << OUTPUT_ERRNO << endl; + out() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl; return 0; } @@ -74,7 +75,7 @@ view = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if ( view == MAP_FAILED ) { - out() << " mmap() failed for " << filename << " len:" << length << " " << OUTPUT_ERRNO << endl; + out() << " mmap() failed for " << filename << " len:" << length << " " << errnoWithDescription() << endl; if ( errno == ENOMEM ){ out() << " mmap failed with out of memory, if you're using 32-bits, then you probably need to upgrade to 64" << endl; } @@ -86,10 +87,15 @@ #else if ( options & SEQUENTIAL ){ if ( madvise( view , length , MADV_SEQUENTIAL ) ){ - out() << " madvise failed for " << filename << " " << OUTPUT_ERRNO << endl; + out() << " madvise failed for " << filename << " " << errnoWithDescription() << endl; } } #endif + + DEV if (! dbMutex.info().isLocked()){ + _unlock(); + } + return view; } @@ -97,9 +103,38 @@ if ( view == 0 || fd == 0 ) return; if ( msync(view, len, sync ? MS_SYNC : MS_ASYNC) ) - problem() << "msync " << OUTPUT_ERRNO << endl; + problem() << "msync " << errnoWithDescription() << endl; } + class PosixFlushable : public MemoryMappedFile::Flushable { + public: + PosixFlushable( void * view , HANDLE fd , long len ) + : _view( view ) , _fd( fd ) , _len(len){ + } + + void flush(){ + if ( _view && _fd ) + if ( msync(_view, _len, MS_SYNC ) ) + problem() << "msync " << errnoWithDescription() << endl; + + } + + void * _view; + HANDLE _fd; + long _len; + }; + + MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush(){ + return new PosixFlushable( view , fd , len ); + } + + void MemoryMappedFile::_lock() { + if (view) assert(mprotect(view, len, PROT_READ | PROT_WRITE) == 0); + } + + void MemoryMappedFile::_unlock() { + if (view) assert(mprotect(view, len, PROT_READ) == 0); + } } // namespace mongo diff -Nru mongodb-1.4.4/util/mmap_win.cpp mongodb-1.6.3/util/mmap_win.cpp --- mongodb-1.4.4/util/mmap_win.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/mmap_win.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,8 +15,9 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "mmap.h" +#include "text.h" #include namespace mongo { @@ -40,12 +41,6 @@ CloseHandle(fd); fd = 0; } - - std::wstring toWideString(const char *s) { - std::basic_ostringstream buf; - buf << s; - return buf.str(); - } unsigned mapped = 0; @@ -68,14 +63,14 @@ } updateLength( filename, length ); - std::wstring filenamew = toWideString(filename); DWORD createOptions = FILE_ATTRIBUTE_NORMAL; if ( options & SEQUENTIAL ) createOptions |= FILE_FLAG_SEQUENTIAL_SCAN; fd = CreateFile( - filenamew.c_str(), GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ, + toNativeString(filename).c_str(), + GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_ALWAYS, createOptions , NULL); if ( fd == INVALID_HANDLE_VALUE ) { out() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl; @@ -92,7 +87,7 @@ view = MapViewOfFile(maphandle, FILE_MAP_ALL_ACCESS, 0, 0, 0); if ( view == 0 ) { - out() << "MapViewOfFile failed " << filename << " " << OUTPUT_ERRNO << " "; + out() << "MapViewOfFile failed " << filename << " " << errnoWithDescription() << " "; out() << GetLastError(); out() << endl; } @@ -100,21 +95,47 @@ return view; } + class WindowsFlushable : public MemoryMappedFile::Flushable { + public: + WindowsFlushable( void * view , HANDLE fd , string filename ) + : _view(view) , _fd(fd) , _filename(filename){ + + } + + void flush(){ + if (!_view || !_fd) + return; + + bool success = FlushViewOfFile(_view, 0); // 0 means whole mapping + if (!success){ + int err = GetLastError(); + out() << "FlushViewOfFile failed " << err << " file: " << _filename << endl; + } + + success = FlushFileBuffers(_fd); + if (!success){ + int err = GetLastError(); + out() << "FlushFileBuffers failed " << err << " file: " << _filename << endl; + } + } + + void * _view; + HANDLE _fd; + string _filename; + + }; + void MemoryMappedFile::flush(bool sync) { uassert(13056, "Async flushing not supported on windows", sync); + + WindowsFlushable f( view , fd , _filename ); + f.flush(); + } - if (!view || !fd) return; - - bool success = FlushViewOfFile(view, 0); // 0 means whole mapping - if (!success){ - int err = GetLastError(); - out() << "FlushViewOfFile failed " << err << " file: " << _filename << endl; - } - - success = FlushFileBuffers(fd); - if (!success){ - int err = GetLastError(); - out() << "FlushFileBuffers failed " << err << " file: " << _filename << endl; - } + MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush(){ + return new WindowsFlushable( view , fd , _filename ); } + void MemoryMappedFile::_lock() {} + void MemoryMappedFile::_unlock() {} + } diff -Nru mongodb-1.4.4/util/mongoutils/checksum.h mongodb-1.6.3/util/mongoutils/checksum.h --- mongodb-1.4.4/util/mongoutils/checksum.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/mongoutils/checksum.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,32 @@ +/** @checksum.h */ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace mongoutils { + + /** + * this is a silly temporary implementation + */ + inline int checksum( const char* x , int size ){ + int ck = 0; + for ( int i=0; i + +namespace mongoutils { + + namespace html { + + using namespace std; + + inline string _end() { return ""; } + inline string _table() { return "\n\n"; } + inline string _tr() { return "\n"; } + + inline string tr() { return ""; } + inline string tr(string a, string b) { + stringstream ss; + ss << "" << a << "" << b << "\n"; + return ss.str(); + } + template + inline string td(T x) { + stringstream ss; + ss << "" << x << ""; + return ss.str(); + } + inline string td(string x) { + return "" + x + ""; + } + inline string th(string x) { + return "" + x + ""; + } + + inline void tablecell( stringstream& ss , bool b ){ + ss << "" << (b ? "X" : "") << ""; + } + + template< typename T> + inline void tablecell( stringstream& ss , const T& t ){ + ss << "" << t << ""; + } + + inline string table(const char *headers[] = 0, bool border = true) { + stringstream ss; + ss << "\n\n"; + if( headers ) { + ss << ""; + while( *headers ) { + ss << ""; + headers++; + } + ss << "\n"; + } + return ss.str(); + } + + inline string start(string title) { + stringstream ss; + ss << "\n"; + ss << title; + ss << "\n"; + + ss << "\n"; + + ss << "\n\n"; + return ss.str(); + } + + inline string red(string contentHtml, bool color=true) { + if( !color ) return contentHtml; + stringstream ss; + ss << "" << contentHtml << ""; + return ss.str(); + } + inline string grey(string contentHtml, bool color=true) { + if( !color ) return contentHtml; + stringstream ss; + ss << "" << contentHtml << ""; + return ss.str(); + } + inline string blue(string contentHtml, bool color=true) { + if( !color ) return contentHtml; + stringstream ss; + ss << "" << contentHtml << ""; + return ss.str(); + } + inline string yellow(string contentHtml, bool color=true) { + if( !color ) return contentHtml; + stringstream ss; + ss << "" << contentHtml << ""; + return ss.str(); + } + inline string green(string contentHtml, bool color=true) { + if( !color ) return contentHtml; + stringstream ss; + ss << "" << contentHtml << ""; + return ss.str(); + } + + inline string p(string contentHtml) { + stringstream ss; + ss << "

    " << contentHtml << "

    \n"; + return ss.str(); + } + + inline string h2(string contentHtml) { + stringstream ss; + ss << "

    " << contentHtml << "

    \n"; + return ss.str(); + } + + /* does NOT escape the strings. */ + inline string a(string href, string title="", string contentHtml = "") { + stringstream ss; + ss << "'; + if( !contentHtml.empty() ) { + ss << contentHtml << ""; + } + return ss.str(); + } + + } + +} diff -Nru mongodb-1.4.4/util/mongoutils/mongoutils.vcxproj mongodb-1.6.3/util/mongoutils/mongoutils.vcxproj --- mongodb-1.4.4/util/mongoutils/mongoutils.vcxproj 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/mongoutils/mongoutils.vcxproj 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,73 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {7B84584E-92BC-4DB9-971B-A1A8F93E5053} + mongoutils + mongoutils test program + + + + Application + true + MultiByte + + + Application + false + true + MultiByte + + + + + + + + + + + + + + + Level3 + Disabled + + + true + + + + + Level3 + MaxSpeed + true + true + + + true + true + true + + + + + + + + + + + + + \ No newline at end of file diff -Nru mongodb-1.4.4/util/mongoutils/mongoutils.vcxproj.filters mongodb-1.6.3/util/mongoutils/mongoutils.vcxproj.filters --- mongodb-1.4.4/util/mongoutils/mongoutils.vcxproj.filters 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/mongoutils/mongoutils.vcxproj.filters 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff -Nru mongodb-1.4.4/util/mongoutils/README mongodb-1.6.3/util/mongoutils/README --- mongodb-1.4.4/util/mongoutils/README 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/mongoutils/README 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,7 @@ + mongoutils namespace requirements: + + (1) code is not database specific, rather, true utilities + (2) are cross platform + (3) may require boost headers, but not libs + (4) are clean and easy to use in any c++ project without pulling in lots of other stuff + (5) apache license diff -Nru mongodb-1.4.4/util/mongoutils/str.h mongodb-1.6.3/util/mongoutils/str.h --- mongodb-1.4.4/util/mongoutils/str.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/mongoutils/str.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,118 @@ +// @file str.h + +/* Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +/* Things in the mongoutils namespace + (1) are not database specific, rather, true utilities + (2) are cross platform + (3) may require boost headers, but not libs + (4) are clean and easy to use in any c++ project without pulling in lots of other stuff + + Note: within this module, we use int for all offsets -- there are no unsigned offsets + and no size_t's. If you need 3 gigabyte long strings, don't use this module. +*/ + +#include +#include + +namespace mongoutils { + + namespace str { + + using namespace std; + + /** the idea here is to make one liners easy. e.g.: + + return str::stream() << 1 << ' ' << 2; + + since the following doesn't work: + + (stringstream() << 1).str(); + */ + class stream { + public: + stringstream ss; + + template + stream& operator<<(const T& v) { + ss << v; + return *this; + } + + operator std::string () const { return ss.str(); } + }; + + inline bool startsWith(const char *str, const char *prefix) { + const char *s = str; + const char *p = prefix; + while( *p ) { + if( *p != *s ) return false; + p++; s++; + } + return true; + } + inline bool startsWith(string s, string p) { return startsWith(s.c_str(), p.c_str()); } + + inline bool endsWith(string s, string p) { + int l = p.size(); + int x = s.size(); + if( x < l ) return false; + return strncmp(s.c_str()+x-l, p.c_str(), l) == 0; + } + + /** find char x, and return rest of string thereafter, or "" if not found */ + inline const char * after(const char *s, char x) { + const char *p = strchr(s, x); + return (p != 0) ? p+1 : ""; } + inline string after(const string& s, char x) { + const char *p = strchr(s.c_str(), x); + return (p != 0) ? string(p+1) : ""; } + + inline const char * after(const char *s, const char *x) { + const char *p = strstr(s, x); + return (p != 0) ? p+strlen(x) : ""; } + inline string after(string s, string x) { + const char *p = strstr(s.c_str(), x.c_str()); + return (p != 0) ? string(p+x.size()) : ""; } + + inline bool contains(string s, string x) { + return strstr(s.c_str(), x.c_str()) != 0; } + + /** @return everything befor the character x, else entire string */ + inline string before(const string& s, char x) { + const char *p = strchr(s.c_str(), x); + return (p != 0) ? s.substr(0, p-s.c_str()) : s; } + + /** check if if strings share a common starting prefix + @return offset of divergence (or length if equal). 0=nothing in common. */ + inline int shareCommonPrefix(const char *p, const char *q) { + int ofs = 0; + while( 1 ) { + if( *p == 0 || *q == 0 ) + break; + if( *p != *q ) + break; + p++; q++; ofs++; + } + return ofs; } + inline int shareCommonPrefix(const string &a, const string &b) + { return shareCommonPrefix(a.c_str(), b.c_str()); } + + } + +} diff -Nru mongodb-1.4.4/util/mongoutils/test.cpp mongodb-1.6.3/util/mongoutils/test.cpp --- mongodb-1.4.4/util/mongoutils/test.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/mongoutils/test.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,34 @@ +/* @file test.cpp + utils/mongoutils/test.cpp + unit tests for mongoutils +*/ + +/* + * Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "str.h" +#include "html.h" +#include + +using namespace std; +using namespace mongoutils; + +int main() { + string x = str::after("abcde", 'c'); + assert( x == "de" ); + assert( str::after("abcde", 'x') == "" ); + return 0; +} diff -Nru mongodb-1.4.4/util/mvar.h mongodb-1.6.3/util/mvar.h --- mongodb-1.4.4/util/mvar.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/mvar.h 1969-12-31 16:00:00.000000000 -0800 @@ -1,116 +0,0 @@ -// mvar.h - -/* Copyright 2009 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -namespace mongo { - - /* This is based on haskell's MVar synchronization primitive: - * http://www.haskell.org/ghc/docs/latest/html/libraries/base-4.2.0.0/Control-Concurrent-MVar.html - * - * It is a thread-safe queue that can hold at most one object. - * You can also think of it as a box that can be either full or empty. - */ - - template - class MVar { - public: - enum State {EMPTY=0, FULL}; - - // create an empty MVar - MVar() - : _state(EMPTY) - {} - - // creates a full MVar - MVar(const T& val) - : _state(FULL) - , _value(val) - {} - - // puts val into the MVar and returns true or returns false if full - // never blocks - bool tryPut(const T& val){ - // intentionally repeat test before and after lock - if (_state == FULL) return false; - Mutex::scoped_lock lock(_mutex); - if (_state == FULL) return false; - - _state = FULL; - _value = val; - - // unblock threads waiting to 'take' - _condition.notify_all(); - - return true; - } - - // puts val into the MVar - // will block if the MVar is already full - void put(const T& val){ - Mutex::scoped_lock lock(_mutex); - while (!tryPut(val)){ - // unlocks lock while waiting and relocks before returning - _condition.wait(lock); - } - } - - // takes val out of the MVar and returns true or returns false if empty - // never blocks - bool tryTake(T& out){ - // intentionally repeat test before and after lock - if (_state == EMPTY) return false; - Mutex::scoped_lock lock(_mutex); - if (_state == EMPTY) return false; - - _state = EMPTY; - out = _value; - - // unblock threads waiting to 'put' - _condition.notify_all(); - - return true; - } - - // takes val out of the MVar - // will block if the MVar is empty - T take(){ - T ret = T(); - - Mutex::scoped_lock lock(_mutex); - while (!tryTake(ret)){ - // unlocks lock while waiting and relocks before returning - _condition.wait(lock); - } - - return ret; - } - - - // Note: this is fast because there is no locking, but state could - // change before you get a chance to act on it. - // Mainly useful for sanity checks / asserts. - State getState(){ return _state; } - - - private: - State _state; - T _value; - typedef boost::recursive_mutex Mutex; - Mutex _mutex; - boost::condition _condition; - }; - -} diff -Nru mongodb-1.4.4/util/ntservice.cpp mongodb-1.6.3/util/ntservice.cpp --- mongodb-1.4.4/util/ntservice.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/ntservice.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,8 +15,10 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "ntservice.h" +#include "winutil.h" +#include "text.h" #include #if defined(_WIN32) @@ -25,14 +27,14 @@ void shutdown(); - SERVICE_STATUS_HANDLE ServiceController::_statusHandle = null; + SERVICE_STATUS_HANDLE ServiceController::_statusHandle = NULL; std::wstring ServiceController::_serviceName; - ServiceCallback ServiceController::_serviceCallback = null; + ServiceCallback ServiceController::_serviceCallback = NULL; ServiceController::ServiceController() { } - bool ServiceController::installService( const std::wstring& serviceName, const std::wstring& displayName, const std::wstring& serviceDesc, int argc, char* argv[] ) { + bool ServiceController::installService( const std::wstring& serviceName, const std::wstring& displayName, const std::wstring& serviceDesc, const std::wstring& serviceUser, const std::wstring& servicePassword, const std::string dbpath, int argc, char* argv[] ) { assert(argc >= 1); stringstream commandLine; @@ -46,38 +48,81 @@ } for ( int i = 1; i < argc; i++ ) { - std::string arg( argv[ i ] ); - - // replace install command to indicate process is being started as a service - if ( arg == "--install" ) - arg = "--service"; - - commandLine << arg << " "; - } - - SC_HANDLE schSCManager = ::OpenSCManager( null, null, SC_MANAGER_ALL_ACCESS ); - if ( schSCManager == null ) + std::string arg( argv[ i ] ); + // replace install command to indicate process is being started as a service + if ( arg == "--install" || arg == "--reinstall" ) { + arg = "--service"; + } else if ( arg == "--dbpath" && i + 1 < argc ) { + commandLine << arg << " \"" << dbpath << "\" "; + i++; + continue; + } else if ( arg.length() > 9 && arg.substr(0, 9) == "--service" ) { + // Strip off --service(Name|User|Password) arguments + continue; + } + commandLine << arg << " "; + } + + SC_HANDLE schSCManager = ::OpenSCManager( NULL, NULL, SC_MANAGER_ALL_ACCESS ); + if ( schSCManager == NULL ) { + DWORD err = ::GetLastError(); + cerr << "Error connecting to the Service Control Manager: " << GetWinErrMsg(err) << endl; + return false; + } + + // Make sure servise doesn't already exist. + // TODO: Check to see if service is in "Deleting" status, suggest the user close down Services MMC snap-ins. + SC_HANDLE schService = ::OpenService( schSCManager, serviceName.c_str(), SERVICE_ALL_ACCESS ); + if ( schService != NULL ) { + cerr << "There is already a service named " << toUtf8String(serviceName) << ". Aborting" << endl; + ::CloseServiceHandle( schService ); + ::CloseServiceHandle( schSCManager ); return false; - + } std::basic_ostringstream< TCHAR > commandLineWide; - commandLineWide << commandLine.str().c_str(); + commandLineWide << commandLine.str().c_str(); + + cerr << "Creating service " << toUtf8String(serviceName) << "." << endl; // create new service - SC_HANDLE schService = ::CreateService( schSCManager, serviceName.c_str(), displayName.c_str(), + schService = ::CreateService( schSCManager, serviceName.c_str(), displayName.c_str(), SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, - commandLineWide.str().c_str(), null, null, L"\0\0", null, null ); + commandLineWide.str().c_str(), NULL, NULL, L"\0\0", NULL, NULL ); + if ( schService == NULL ) { + DWORD err = ::GetLastError(); + cerr << "Error creating service: " << GetWinErrMsg(err) << endl; + ::CloseServiceHandle( schSCManager ); + return false; + } - if ( schService == null ) { - ::CloseServiceHandle( schSCManager ); - return false; - } + cerr << "Service creation successful." << endl; + cerr << "Service can be started from the command line via 'net start \"" << toUtf8String(serviceName) << "\"'." << endl; + + bool serviceInstalled; + + // TODO: If neccessary grant user "Login as a Service" permission. + if ( !serviceUser.empty() ) { + std::wstring actualServiceUser; + if ( serviceUser.find(L"\\") == string::npos ) { + actualServiceUser = L".\\" + serviceUser; + } + else { + actualServiceUser = serviceUser; + } + cerr << "Setting service login credentials. User: " << toUtf8String(actualServiceUser) << endl; + serviceInstalled = ::ChangeServiceConfig( schService, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, NULL, NULL, NULL, NULL, actualServiceUser.c_str(), servicePassword.c_str(), NULL ); + if ( !serviceInstalled ) { + cerr << "Setting service login failed. Service has 'LocalService' permissions." << endl; + } + } + + // set the service description SERVICE_DESCRIPTION serviceDescription; serviceDescription.lpDescription = (LPTSTR)serviceDesc.c_str(); - - // set new service description - bool serviceInstalled = ::ChangeServiceConfig2( schService, SERVICE_CONFIG_DESCRIPTION, &serviceDescription ); + serviceInstalled = ::ChangeServiceConfig2( schService, SERVICE_CONFIG_DESCRIPTION, &serviceDescription ); + if ( serviceInstalled ) { SC_ACTION aActions[ 3 ] = { { SC_ACTION_RESTART, 0 }, { SC_ACTION_RESTART, 0 }, { SC_ACTION_RESTART, 0 } }; @@ -89,8 +134,12 @@ // set service recovery options serviceInstalled = ::ChangeServiceConfig2( schService, SERVICE_CONFIG_FAILURE_ACTIONS, &serviceFailure ); + } - + else { + cerr << "Could not set service description. Check the event log for more details." << endl; + } + ::CloseServiceHandle( schService ); ::CloseServiceHandle( schSCManager ); @@ -98,13 +147,16 @@ } bool ServiceController::removeService( const std::wstring& serviceName ) { - SC_HANDLE schSCManager = ::OpenSCManager( null, null, SC_MANAGER_ALL_ACCESS ); - if ( schSCManager == null ) - return false; + SC_HANDLE schSCManager = ::OpenSCManager( NULL, NULL, SC_MANAGER_ALL_ACCESS ); + if ( schSCManager == NULL ) { + DWORD err = ::GetLastError(); + cerr << "Error connecting to the Service Control Manager: " << GetWinErrMsg(err) << endl; + return false; + } SC_HANDLE schService = ::OpenService( schSCManager, serviceName.c_str(), SERVICE_ALL_ACCESS ); - - if ( schService == null ) { + if ( schService == NULL ) { + cerr << "Could not find a service named " << toUtf8String(serviceName) << " to uninstall." << endl; ::CloseServiceHandle( schSCManager ); return false; } @@ -113,20 +165,30 @@ // stop service if its running if ( ::ControlService( schService, SERVICE_CONTROL_STOP, &serviceStatus ) ) { + cerr << "Service " << toUtf8String(serviceName) << " is currently running. Stopping service." << endl; while ( ::QueryServiceStatus( schService, &serviceStatus ) ) { if ( serviceStatus.dwCurrentState == SERVICE_STOP_PENDING ) - { - Sleep( 1000 ); - } - else { break; } + { + Sleep( 1000 ); + } + else { break; } } + cerr << "Service stopped." << endl; } + cerr << "Deleting service " << toUtf8String(serviceName) << "." << endl; bool serviceRemoved = ::DeleteService( schService ); ::CloseServiceHandle( schService ); ::CloseServiceHandle( schSCManager ); + if (serviceRemoved) { + cerr << "Service deleted successfully." << endl; + } + else { + cerr << "Failed to delete service." << endl; + } + return serviceRemoved; } @@ -136,14 +198,14 @@ SERVICE_TABLE_ENTRY dispTable[] = { { (LPTSTR)serviceName.c_str(), (LPSERVICE_MAIN_FUNCTION)ServiceController::initService }, - { null, null } + { NULL, NULL } }; return StartServiceCtrlDispatcher( dispTable ); } bool ServiceController::reportStatus( DWORD reportState, DWORD waitHint ) { - if ( _statusHandle == null ) + if ( _statusHandle == NULL ) return false; static DWORD checkPoint = 1; diff -Nru mongodb-1.4.4/util/ntservice.h mongodb-1.6.3/util/ntservice.h --- mongodb-1.4.4/util/ntservice.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/ntservice.h 2010-09-24 10:02:42.000000000 -0700 @@ -29,7 +29,7 @@ ServiceController(); virtual ~ServiceController() {} - static bool installService( const std::wstring& serviceName, const std::wstring& displayName, const std::wstring& serviceDesc, int argc, char* argv[] ); + static bool installService( const std::wstring& serviceName, const std::wstring& displayName, const std::wstring& serviceDesc, const std::wstring& serviceUser, const std::wstring& servicePassword, const std::string dbpath, int argc, char* argv[] ); static bool removeService( const std::wstring& serviceName ); static bool startService( const std::wstring& serviceName, ServiceCallback startService ); static bool reportStatus( DWORD reportState, DWORD waitHint = 0 ); diff -Nru mongodb-1.4.4/util/optime.h mongodb-1.6.3/util/optime.h --- mongodb-1.4.4/util/optime.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/optime.h 2010-09-24 10:02:42.000000000 -0700 @@ -20,15 +20,20 @@ #include "../db/concurrency.h" namespace mongo { - void exitCleanly( int code ); + void exitCleanly( ExitCode code ); - /* Operation sequence #. A combination of current second plus an ordinal value. - */ struct ClockSkewException : public DBException { - virtual const char* what() const throw() { return "clock skew exception"; } - virtual int getCode(){ return 20001; } + ClockSkewException() : DBException( "clock skew exception" , 20001 ){} }; - + + /* replsets use RSOpTime. + M/S uses OpTime. + But this is useable from both. + */ + typedef unsigned long long ReplTime; + + /* Operation sequence #. A combination of current second plus an ordinal value. + */ #pragma pack(4) class OpTime { unsigned i; @@ -44,8 +49,8 @@ OpTime(Date_t date) { reinterpret_cast(*this) = date.millis; } - OpTime(unsigned long long date) { - reinterpret_cast(*this) = date; + OpTime(ReplTime x) { + reinterpret_cast(*this) = x; } OpTime(unsigned a, unsigned b) { secs = a; @@ -87,29 +92,35 @@ bytes of overhead. */ unsigned long long asDate() const { - return *((unsigned long long *) &i); + return reinterpret_cast(&i)[0]; } - // unsigned long long& asDate() { return *((unsigned long long *) &i); } - - bool isNull() { - return secs == 0; + long long asLL() const { + return reinterpret_cast(&i)[0]; } + bool isNull() const { return secs == 0; } + string toStringLong() const { char buf[64]; time_t_to_String(secs, buf); stringstream ss; - ss << buf << ' '; + ss << time_t_to_String_short(secs) << ' '; ss << hex << secs << ':' << i; return ss.str(); } + string toStringPretty() const { + stringstream ss; + ss << time_t_to_String_short(secs) << ':' << hex << i; + return ss.str(); + } + string toString() const { stringstream ss; ss << hex << secs << ':' << i; return ss.str(); } - operator string() const { return toString(); } + bool operator==(const OpTime& r) const { return i == r.i && secs == r.secs; } @@ -121,6 +132,15 @@ return secs < r.secs; return i < r.i; } + bool operator<=(const OpTime& r) const { + return *this < r || *this == r; + } + bool operator>(const OpTime& r) const { + return !(*this <= r); + } + bool operator>=(const OpTime& r) const { + return !(*this < r); + } }; #pragma pack() diff -Nru mongodb-1.4.4/util/password.cpp mongodb-1.6.3/util/password.cpp --- mongodb-1.4.4/util/password.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/password.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,92 @@ +/* + * Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pch.h" +#include "password.h" +#include + +#ifndef _WIN32 +#include +#endif + +using namespace std; + +namespace mongo { + + string askPassword() { + + std::string password; + cout << "Enter password: "; +#ifndef _WIN32 + const int stdinfd = 0; + termios termio; + tcflag_t old = 0; + if ( isatty( stdinfd ) ) { + int i = tcgetattr( stdinfd, &termio ); + if( i == -1 ) { + cerr << "Cannot get terminal attributes " << errnoWithDescription() << endl; + return string(); + } + old = termio.c_lflag; + termio.c_lflag &= ~ECHO; + i = tcsetattr( stdinfd, TCSANOW, &termio ); + if( i == -1 ) { + cerr << "Cannot set terminal attributes " << errnoWithDescription() << endl; + return string(); + } + } + + cin >> password; + + if ( isatty( stdinfd ) ) { + termio.c_lflag = old; + int i = tcsetattr( stdinfd, TCSANOW, &termio ); + if( i == -1 ) { + cerr << "Cannot set terminal attributes " << errnoWithDescription() << endl; + return string(); + } + } +#else + HANDLE stdinh = GetStdHandle( STD_INPUT_HANDLE ); + if ( stdinh == INVALID_HANDLE_VALUE) { + cerr << "Cannot get stdin handle " << GetLastError() << "\n"; + return string(); + } + + DWORD old; + if ( !GetConsoleMode( stdinh, &old ) ) { + cerr << "Cannot get console mode " << GetLastError() << "\n"; + return string(); + } + + DWORD noecho = ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT; + if ( !SetConsoleMode( stdinh, noecho ) ) { + cerr << "Cannot set console mode " << GetLastError() << "\n"; + return string(); + } + + cin >> password; + + if ( !SetConsoleMode( stdinh, old ) ) { + cerr << "Cannot set console mode " << GetLastError() << "\n"; + return string(); + } + cin.get(); +#endif + cout << "\n"; + return password; + } +} diff -Nru mongodb-1.4.4/util/password.h mongodb-1.6.3/util/password.h --- mongodb-1.4.4/util/password.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/password.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,61 @@ +/* + * Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include +#include + +namespace mongo { + + struct PasswordValue : public boost::program_options::typed_value { + + PasswordValue( std::string* val ) + : boost::program_options::typed_value( val ) { } + + unsigned min_tokens() const { + return 0; + } + + unsigned max_tokens() const { + return 1; + } + + bool is_required() const { + return false; + } + + void xparse( boost::any& value_store, + const std::vector& new_tokens ) const { + if ( !value_store.empty() ) +#if BOOST_VERSION >= 104200 + boost::throw_exception( boost::program_options::validation_error( boost::program_options::validation_error::multiple_values_not_allowed ) ); +#else + boost::throw_exception( boost::program_options::validation_error( "multiple values not allowed" ) ); +#endif + else if ( !new_tokens.empty() ) + boost::program_options::typed_value::xparse + (value_store, new_tokens); + else + value_store = std::string(); + } + + }; + + std::string askPassword(); + +} diff -Nru mongodb-1.4.4/util/processinfo.cpp mongodb-1.6.3/util/processinfo.cpp --- mongodb-1.4.4/util/processinfo.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/processinfo.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,47 @@ +// processinfo.cpp + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pch.h" +#include "processinfo.h" + +#include +using namespace std; + +namespace mongo { + + class PidFileWiper { + public: + ~PidFileWiper(){ + ofstream out( path.c_str() , ios_base::out ); + out.close(); + } + + void write( const string& p ){ + path = p; + ofstream out( path.c_str() , ios_base::out ); + out << getpid() << endl; + out.close(); + } + + string path; + } pidFileWiper; + + void writePidFile( const string& path ){ + pidFileWiper.write( path ); + } + +} diff -Nru mongodb-1.4.4/util/processinfo_darwin.cpp mongodb-1.6.3/util/processinfo_darwin.cpp --- mongodb-1.4.4/util/processinfo_darwin.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/processinfo_darwin.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "../stdafx.h" +#include "../pch.h" #include "processinfo.h" #include "log.h" @@ -108,7 +108,7 @@ start = start - ( (unsigned long long)start % pageSize ); char x = 0; if ( mincore( start , 128 , &x ) ){ - log() << "mincore failed: " << OUTPUT_ERRNO << endl; + log() << "mincore failed: " << errnoWithDescription() << endl; return 1; } return x & 0x1; diff -Nru mongodb-1.4.4/util/processinfo.h mongodb-1.6.3/util/processinfo.h --- mongodb-1.4.4/util/processinfo.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/processinfo.h 2010-09-24 10:02:42.000000000 -0700 @@ -18,6 +18,7 @@ #pragma once #include +#include #ifndef _WIN32 #include @@ -58,5 +59,7 @@ private: pid_t _pid; }; - + + void writePidFile( const std::string& path ); + } diff -Nru mongodb-1.4.4/util/processinfo_linux2.cpp mongodb-1.6.3/util/processinfo_linux2.cpp --- mongodb-1.4.4/util/processinfo_linux2.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/processinfo_linux2.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -40,11 +40,10 @@ FILE * f = fopen( name , "r"); if ( ! f ){ stringstream ss; - ss << "couldn't open [" << name << "] " << OUTPUT_ERRNO; + ss << "couldn't open [" << name << "] " << errnoWithDescription(); string s = ss.str(); - msgasserted( 13276 , s.c_str() ); + msgassertedNoTrace( 13276 , s.c_str() ); } - int found = fscanf(f, "%d %s %c " "%d %d %d %d %d " @@ -232,7 +231,7 @@ start = start - ( (unsigned long long)start % pageSize ); unsigned char x = 0; if ( mincore( start , 128 , &x ) ){ - log() << "mincore failed: " << OUTPUT_ERRNO << endl; + log() << "mincore failed: " << errnoWithDescription() << endl; return 1; } return x & 0x1; diff -Nru mongodb-1.4.4/util/processinfo_none.cpp mongodb-1.6.3/util/processinfo_none.cpp --- mongodb-1.4.4/util/processinfo_none.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/processinfo_none.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "processinfo.h" #include diff -Nru mongodb-1.4.4/util/processinfo_win32.cpp mongodb-1.6.3/util/processinfo_win32.cpp --- mongodb-1.4.4/util/processinfo_win32.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/processinfo_win32.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "processinfo.h" #include diff -Nru mongodb-1.4.4/util/queue.h mongodb-1.6.3/util/queue.h --- mongodb-1.4.4/util/queue.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/queue.h 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -// queue.h +// @file queue.h /* Copyright 2009 10gen Inc. * @@ -17,7 +17,7 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include "../util/goodies.h" #include @@ -29,6 +29,8 @@ */ template class BlockingQueue : boost::noncopyable { public: + BlockingQueue() : _lock("BlockingQueue") { } + void push(T const& t){ scoped_lock l( _lock ); _queue.push( t ); diff -Nru mongodb-1.4.4/util/ramlog.h mongodb-1.6.3/util/ramlog.h --- mongodb-1.4.4/util/ramlog.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/ramlog.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,142 @@ +// log.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "log.h" +#include "mongoutils/html.h" + +namespace mongo { + + class RamLog : public Tee { + enum { + N = 128, + C = 256 + }; + char lines[N][C]; + unsigned h, n; + + public: + RamLog() { + h = 0; n = 0; + for( int i = 0; i < N; i++ ) + lines[i][C-1] = 0; + } + + virtual void write(LogLevel ll, const string& str) { + char *p = lines[(h+n)%N]; + if( str.size() < C ) + strcpy(p, str.c_str()); + else + memcpy(p, str.c_str(), C-1); + if( n < N ) n++; + else h = (h+1) % N; + } + + void get( vector& v) const { + for( unsigned x=0, i=h; x++ < n; i=(i+1)%N ) + v.push_back(lines[i]); + } + + static int repeats(const vector& v, int i) { + for( int j = i-1; j >= 0 && j+8 > i; j-- ) { + if( strcmp(v[i]+20,v[j]+20) == 0 ) { + for( int x = 1; ; x++ ) { + if( j+x == i ) return j; + if( i+x>=(int) v.size() ) return -1; + if( strcmp(v[i+x]+20,v[j+x]+20) ) return -1; + } + return -1; + } + } + return -1; + } + + + static string clean(const vector& v, int i, string line="") { + if( line.empty() ) line = v[i]; + if( i > 0 && strncmp(v[i], v[i-1], 11) == 0 ) + return string(" ") + line.substr(11); + return v[i]; + } + + static string color(string line) { + string s = str::after(line, "replSet "); + if( str::startsWith(s, "warning") || startsWith(s, "error") ) + return html::red(line); + if( str::startsWith(s, "info") ) { + if( str::endsWith(s, " up\n") ) + return html::green(line); + else if( str::contains(s, " down ") || str::endsWith(s, " down\n") ) + return html::yellow(line); + return line; //html::blue(line); + } + + return line; + } + + /* turn http:... into an anchor */ + string linkify(const char *s) { + const char *p = s; + const char *h = strstr(p, "http://"); + if( h == 0 ) return s; + + const char *sp = h + 7; + while( *sp && *sp != ' ' ) sp++; + + string url(h, sp-h); + stringstream ss; + ss << string(s, h-s) << "" << url << "" << sp; + return ss.str(); + } + + void toHTML(stringstream& s) { + vector v; + get( v ); + + bool first = true; + s << "
    \n";
    +            for( int i = 0; i < (int)v.size(); i++ ) {
    +                assert( strlen(v[i]) > 20 );
    +                int r = repeats(v, i);
    +                if( r < 0 ) {
    +                    s << color( linkify( clean(v,i).c_str() ) );
    +                } 
    +                else {
    +                    stringstream x;
    +                    x << string(v[i], 0, 20);
    +                    int nr = (i-r);
    +                    int last = i+nr-1;
    +                    for( ; r < i ; r++ ) x << '.';
    +                    if( 1 ) { 
    +                        stringstream r; 
    +                        if( nr == 1 ) r << "repeat last line";
    +                        else r << "repeats last " << nr << " lines; ends " << string(v[last]+4,0,15);
    +                        first = false; s << html::a("", r.str(), clean(v,i,x.str()));
    +                    }
    +                    else s << x.str();
    +                    s << '\n';
    +                    i = last;
    +                }
    +            }
    +            s << "
    \n"; + } + + + }; + +} diff -Nru mongodb-1.4.4/util/ramstore.cpp mongodb-1.6.3/util/ramstore.cpp --- mongodb-1.4.4/util/ramstore.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/ramstore.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,93 @@ +/** +* Copyright (C) 2008 10gen Inc.info +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" +#include "mmap.h" + +namespace mongo { + + //extern bool checkNsFilesOnLoad; + +static set files; + +void RamStoreFile::grow(int offset, int len) { + cout << "GROW ofs:" << offset << " len:" << len; + assert( len > 0 ); + Node& n = _m[offset]; + cout << " oldlen:" << n.len << endl; + assert( n.len > 0 ); + if( len > n.len ) { + n.p = (char *) realloc(n.p, len); + memset(((char *)n.p) + n.len, 0, len - n.len); + n.len = len; + } +} + +/* maxLen can be -1 for existing data */ +void* RamStoreFile::at(int offset, int maxLen) { + if( offset != _last ) { + if( _m.count(_last) ) { + _m[_last].check(); + if( !(offset < _last || offset >= _last + _m[_last].len) ) { + cout << offset << ' ' << _last << ' ' << _m[_last].len << endl; + assert(false); + } + } + } + _last = offset; + + Node& n = _m[offset]; + if( n.len == 0 ) { + // create + if( strstr(name, ".ns") == 0 ) + cout << "CREATE " << name << " ofs:" << offset << " len:" << maxLen << endl; + assert( maxLen >= 0 ); + n.p = (char *) calloc(maxLen+1, 1); + n.len = maxLen; + } + assert( n.len >= maxLen ); + n.check(); + return n.p; + } + +void RamStoreFile::Node::check() { + assert( p[len] == 0 ); +} + +void RamStoreFile::check() { + for( std::map::iterator i = _m.begin(); i != _m.end(); i++ ) { + i->second.check(); + } +} + +void RamStoreFile::validate() { + for( set::iterator i = files.begin(); i != files.end(); i++ ) { + (*i)->check(); + } +} + +RamStoreFile::~RamStoreFile() { + check(); + files.erase(this); +} + +RamStoreFile::RamStoreFile() : _len(0) { + // checkNsFilesOnLoad = false; + files.insert(this); +} + +} + diff -Nru mongodb-1.4.4/util/ramstore.h mongodb-1.6.3/util/ramstore.h --- mongodb-1.4.4/util/ramstore.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/ramstore.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,86 @@ +// ramstore.h + +// mmap.h + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +extern bool checkNsFilesOnLoad; + +class RamStoreFile : public MongoFile { + char name[256]; + struct Node { + char *p; + int len; + Node() : len(0) { } + void check(); + }; + std::map _m; + long _len; + + static void validate(); + void check(); + + int _last; + + void grow(int offset, int len); + + /* maxLen can be -1 for existing data */ + void* at(int offset, int maxLen); + +protected: + virtual void close() { + cout << "ramstore dealloc not yet implemented" << endl; + if( _len ) { + _len = 0; + } + } + virtual void flush(bool sync) { } + +public: + ~RamStoreFile(); + RamStoreFile(); + + virtual long length() { return _len; } + + class Pointer { + RamStoreFile* _f; + friend class RamStoreFile; + public: + void* at(int offset, int len) { + assert( len <= /*MaxBSONObjectSize*/4*1024*1024 + 128 ); + return _f->at(offset,len); + } + void grow(int offset, int len) { + assert( len <= /*MaxBSONObjectSize*/4*1024*1024 + 128 ); + _f->grow(offset,len); + } + bool isNull() const { return _f == 0; } + }; + + Pointer map( const char *filename ) { + assert(false); return Pointer(); + } + Pointer map(const char *_filename, long &length, int options=0) { + strncpy(name, _filename, sizeof(name)-1); + Pointer p; + p._f = this; + return p; + } + + static bool exists(boost::filesystem::path p) { + return false; + } +}; diff -Nru mongodb-1.4.4/util/sock.cpp mongodb-1.6.3/util/sock.cpp --- mongodb-1.4.4/util/sock.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/sock.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -15,31 +15,134 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "sock.h" namespace mongo { - static mongo::mutex sock_mutex; + static mongo::mutex sock_mutex("sock_mutex"); + + static bool ipv6 = false; + void enableIPv6(bool state) { ipv6 = state; } + bool IPv6Enabled() { return ipv6; } + + SockAddr::SockAddr(int sourcePort) { + memset(as().sin_zero, 0, sizeof(as().sin_zero)); + as().sin_family = AF_INET; + as().sin_port = htons(sourcePort); + as().sin_addr.s_addr = htonl(INADDR_ANY); + addressSize = sizeof(sockaddr_in); + } + + SockAddr::SockAddr(const char * iporhost , int port) { + if (!strcmp(iporhost, "localhost")) + iporhost = "127.0.0.1"; + + if (strchr(iporhost, '/')){ +#ifdef _WIN32 + uassert(13080, "no unix socket support on windows", false); +#endif + uassert(13079, "path to unix socket too long", strlen(iporhost) < sizeof(as().sun_path)); + as().sun_family = AF_UNIX; + strcpy(as().sun_path, iporhost); + addressSize = sizeof(sockaddr_un); + }else{ + addrinfo* addrs = NULL; + addrinfo hints; + memset(&hints, 0, sizeof(addrinfo)); + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_ADDRCONFIG; + hints.ai_family = (IPv6Enabled() ? AF_UNSPEC : AF_INET); + + stringstream ss; + ss << port; + int ret = getaddrinfo(iporhost, ss.str().c_str(), &hints, &addrs); + if (ret){ + log() << "getaddrinfo(\"" << iporhost << "\") failed: " << gai_strerror(ret) << endl; + *this = SockAddr(port); + }else{ + //TODO: handle other addresses in linked list; + assert(addrs->ai_addrlen <= sizeof(sa)); + memcpy(&sa, addrs->ai_addr, addrs->ai_addrlen); + addressSize = addrs->ai_addrlen; + freeaddrinfo(addrs); + } + } + } + + bool SockAddr::isLocalHost() const { + switch (getType()){ + case AF_INET: return getAddr() == "127.0.0.1"; + case AF_INET6: return getAddr() == "::1"; + case AF_UNIX: return true; + default: return false; + } + assert(false); + return false; + } string hostbyname(const char *hostname) { - static string unknown = "0.0.0.0"; - if ( unknown == hostname ) - return unknown; + string addr = SockAddr(hostname, 0).getAddr(); + if (addr == "0.0.0.0") + return ""; + else + return addr; + } - scoped_lock lk(sock_mutex); -#if defined(_WIN32) - if( inet_addr(hostname) != INADDR_NONE ) - return hostname; -#else - struct in_addr temp; - if ( inet_aton( hostname, &temp ) ) - return hostname; -#endif - struct hostent *h; - h = gethostbyname(hostname); - if ( h == 0 ) return ""; - return inet_ntoa( *((struct in_addr *)(h->h_addr)) ); + class UDPConnection { + public: + UDPConnection() { + sock = 0; + } + ~UDPConnection() { + if ( sock ) { + closesocket(sock); + sock = 0; + } + } + bool init(const SockAddr& myAddr); + int recvfrom(char *buf, int len, SockAddr& sender); + int sendto(char *buf, int len, const SockAddr& EndPoint); + int mtu(const SockAddr& sa) { + return sa.isLocalHost() ? 16384 : 1480; + } + + SOCKET sock; + }; + + inline int UDPConnection::recvfrom(char *buf, int len, SockAddr& sender) { + return ::recvfrom(sock, buf, len, 0, sender.raw(), &sender.addressSize); + } + + inline int UDPConnection::sendto(char *buf, int len, const SockAddr& EndPoint) { + if ( 0 && rand() < (RAND_MAX>>4) ) { + out() << " NOTSENT "; + return 0; + } + return ::sendto(sock, buf, len, 0, EndPoint.raw(), EndPoint.addressSize); + } + + inline bool UDPConnection::init(const SockAddr& myAddr) { + sock = socket(myAddr.getType(), SOCK_DGRAM, IPPROTO_UDP); + if ( sock == INVALID_SOCKET ) { + out() << "invalid socket? " << errnoWithDescription() << endl; + return false; + } + if ( ::bind(sock, myAddr.raw(), myAddr.addressSize) != 0 ) { + out() << "udp init failed" << endl; + closesocket(sock); + sock = 0; + return false; + } + socklen_t optLen; + int rcvbuf; + if (getsockopt(sock, + SOL_SOCKET, + SO_RCVBUF, + (char*)&rcvbuf, + &optLen) != -1) + out() << "SO_RCVBUF:" << rcvbuf << endl; + return true; } void sendtest() { @@ -50,7 +153,7 @@ if ( c.init(me) ) { char buf[256]; out() << "sendto: "; - out() << c.sendto(buf, sizeof(buf), dest) << " " << OUTPUT_ERRNO << endl; + out() << c.sendto(buf, sizeof(buf), dest) << " " << errnoWithDescription() << endl; } out() << "end\n"; } @@ -63,147 +166,44 @@ if ( c.init(me) ) { char buf[256]; out() << "recvfrom: "; - out() << c.recvfrom(buf, sizeof(buf), sender) << " " << OUTPUT_ERRNO << endl; + out() << c.recvfrom(buf, sizeof(buf), sender) << " " << errnoWithDescription() << endl; } out() << "end listentest\n"; } void xmain(); - struct SockStartupTests { - SockStartupTests() { + #if defined(_WIN32) - WSADATA d; - if ( WSAStartup(MAKEWORD(2,2), &d) != 0 ) { - out() << "ERROR: wsastartup failed " << OUTPUT_ERRNO << endl; - problem() << "ERROR: wsastartup failed " << OUTPUT_ERRNO << endl; - dbexit( EXIT_NTSERVICE_ERROR ); + namespace { + struct WinsockInit { + WinsockInit() { + WSADATA d; + if ( WSAStartup(MAKEWORD(2,2), &d) != 0 ) { + out() << "ERROR: wsastartup failed " << errnoWithDescription() << endl; + problem() << "ERROR: wsastartup failed " << errnoWithDescription() << endl; + dbexit( EXIT_NTSERVICE_ERROR ); + } } -#endif - //out() << "ntohl:" << ntohl(256) << endl; - //sendtest(); - //listentest(); - } - } sstests; - -#if 0 - void smain() { - - WSADATA wsaData; - SOCKET RecvSocket; - sockaddr_in RecvAddr; - int Port = 27015; - char RecvBuf[1024]; - int BufLen = 1024; - sockaddr_in SenderAddr; - int SenderAddrSize = sizeof(SenderAddr); - - //----------------------------------------------- - // Initialize Winsock - WSAStartup(MAKEWORD(2,2), &wsaData); - - //----------------------------------------------- - // Create a receiver socket to receive datagrams - RecvSocket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - prebindOptions( RecvSocket ); - - //----------------------------------------------- - // Bind the socket to any address and the specified port. - RecvAddr.sin_family = AF_INET; - RecvAddr.sin_port = htons(Port); - RecvAddr.sin_addr.s_addr = htonl(INADDR_ANY); - - ::bind(RecvSocket, (SOCKADDR *) &RecvAddr, sizeof(RecvAddr)); - - //----------------------------------------------- - // Call the recvfrom function to receive datagrams - // on the bound socket. - printf("Receiving datagrams...\n"); - recvfrom(RecvSocket, - RecvBuf, - BufLen, - 0, - (SOCKADDR *)&SenderAddr, - &SenderAddrSize); - - //----------------------------------------------- - // Close the socket when finished receiving datagrams - printf("Finished receiving. Closing socket.\n"); - closesocket(RecvSocket); - - //----------------------------------------------- - // Clean up and exit. - printf("Exiting.\n"); - WSACleanup(); - return; + } winsock_init; } - - - - - void xmain() { - - WSADATA wsaData; - SOCKET RecvSocket; - sockaddr_in RecvAddr; - int Port = 27015; - char RecvBuf[1024]; - int BufLen = 1024; - sockaddr_in SenderAddr; - int SenderAddrSize = sizeof(SenderAddr); - - //----------------------------------------------- - // Initialize Winsock - WSAStartup(MAKEWORD(2,2), &wsaData); - - //----------------------------------------------- - // Create a receiver socket to receive datagrams - - RecvSocket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - prebindOptions( RecvSocket ); - - //----------------------------------------------- - // Bind the socket to any address and the specified port. - RecvAddr.sin_family = AF_INET; - RecvAddr.sin_port = htons(Port); - RecvAddr.sin_addr.s_addr = htonl(INADDR_ANY); - - SockAddr a(Port); - ::bind(RecvSocket, (SOCKADDR *) &a.sa, a.addressSize); -// bind(RecvSocket, (SOCKADDR *) &RecvAddr, sizeof(RecvAddr)); - - SockAddr b; - - //----------------------------------------------- - // Call the recvfrom function to receive datagrams - // on the bound socket. - printf("Receiving datagrams...\n"); - recvfrom(RecvSocket, - RecvBuf, - BufLen, - 0, - (SOCKADDR *) &b.sa, &b.addressSize); -// (SOCKADDR *)&SenderAddr, -// &SenderAddrSize); - - //----------------------------------------------- - // Close the socket when finished receiving datagrams - printf("Finished receiving. Closing socket.\n"); - closesocket(RecvSocket); - - //----------------------------------------------- - // Clean up and exit. - printf("Exiting.\n"); - WSACleanup(); - return; - } - #endif + SockAddr unknownAddress( "0.0.0.0", 0 ); + ListeningSockets* ListeningSockets::_instance = new ListeningSockets(); ListeningSockets* ListeningSockets::get(){ return _instance; } + + string getHostNameCached(){ + static string host; + if ( host.empty() ){ + string s = getHostName(); + host = s; + } + return host; + } } // namespace mongo diff -Nru mongodb-1.4.4/util/sock.h mongodb-1.6.3/util/sock.h --- mongodb-1.4.4/util/sock.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/sock.h 2010-09-24 10:02:42.000000000 -0700 @@ -17,32 +17,43 @@ #pragma once -#include "../stdafx.h" +#include "../pch.h" #include #include #include "goodies.h" - -#ifdef _WIN32 -#include -#include -#endif +#include "../db/jsobj.h" namespace mongo { + const int SOCK_FAMILY_UNKNOWN_ERROR=13078; + #if defined(_WIN32) + typedef short sa_family_t; typedef int socklen_t; inline int getLastError() { return WSAGetLastError(); } + inline const char* gai_strerror(int code) { + return ::gai_strerrorA(code); + } inline void disableNagle(int sock) { int x = 1; if ( setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &x, sizeof(x)) ) out() << "ERROR: disableNagle failed" << endl; + if ( setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &x, sizeof(x)) ) + out() << "ERROR: SO_KEEPALIVE failed" << endl; } inline void prebindOptions( int sock ) { } + + // This won't actually be used on windows + struct sockaddr_un { + short sun_family; + char sun_path[108]; // length from unix header + }; + #else } // namespace mongo @@ -50,11 +61,19 @@ #include #include #include +#include #include #include #include #include #include +#ifdef __openbsd__ +# include +#endif + +#ifndef AI_ADDRCONFIG +# define AI_ADDRCONFIG 0 +#endif namespace mongo { @@ -74,7 +93,12 @@ #endif if ( setsockopt(sock, level, TCP_NODELAY, (char *) &x, sizeof(x)) ) - log() << "ERROR: disableNagle failed" << endl; + log() << "ERROR: disableNagle failed: " << errnoWithDescription() << endl; + +#ifdef SO_KEEPALIVE + if ( setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &x, sizeof(x)) ) + log() << "ERROR: SO_KEEPALIVE failed: " << errnoWithDescription() << endl; +#endif } inline void prebindOptions( int sock ) { @@ -87,163 +111,149 @@ #endif - inline void setSockReceiveTimeout(int sock, int secs) { -// todo - finish - works? + inline string makeUnixSockPath(int port){ + return "/tmp/mongodb-" + BSONObjBuilder::numStr(port) + ".sock"; + } + + inline void setSockTimeouts(int sock, int secs) { struct timeval tv; - tv.tv_sec = 0;//secs; - tv.tv_usec = 1000; - int rc = setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv)); - if ( rc ) { - out() << "ERROR: setsockopt RCVTIMEO failed rc:" << rc << " " << OUTPUT_ERRNO << " secs:" << secs << " sock:" << sock << endl; - } + tv.tv_sec = secs; + tv.tv_usec = 0; + bool report = logLevel > 3; // solaris doesn't provide these + DEV report = true; + bool ok = setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv) ) == 0; + if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; + ok = setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &tv, sizeof(tv) ) == 0; + DEV if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; } // If an ip address is passed in, just return that. If a hostname is passed // in, look up its ip and return that. Returns "" on failure. string hostbyname(const char *hostname); + void enableIPv6(bool state=true); + bool IPv6Enabled(); + struct SockAddr { SockAddr() { - addressSize = sizeof(sockaddr_in); + addressSize = sizeof(sa); memset(&sa, 0, sizeof(sa)); + sa.ss_family = AF_UNSPEC; } SockAddr(int sourcePort); /* listener side */ SockAddr(const char *ip, int port); /* EndPoint (remote) side, or if you want to specify which interface locally */ - struct sockaddr_in sa; - socklen_t addressSize; - - bool isLocalHost() const { -#if defined(_WIN32) - return sa.sin_addr.S_un.S_addr == 0x100007f; -#else - return sa.sin_addr.s_addr == 0x100007f; -#endif - } - - string toString() const{ - stringstream out; - out << inet_ntoa(sa.sin_addr) << ':' - << ntohs(sa.sin_port); - return out.str(); - } - - operator string() const{ - return toString(); + template + T& as() { return *(T*)(&sa); } + template + const T& as() const { return *(const T*)(&sa); } + + string toString(bool includePort=true) const{ + string out = getAddr(); + if (includePort && getType() != AF_UNIX && getType() != AF_UNSPEC) + out += ':' + BSONObjBuilder::numStr(getPort()); + return out; + } + + // returns one of AF_INET, AF_INET6, or AF_UNIX + sa_family_t getType() const { + return sa.ss_family; + } + + unsigned getPort() const { + switch (getType()){ + case AF_INET: return ntohs(as().sin_port); + case AF_INET6: return ntohs(as().sin6_port); + case AF_UNIX: return 0; + case AF_UNSPEC: return 0; + default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); return 0; + } } - unsigned getPort() { - return sa.sin_port; + string getAddr() const { + switch (getType()){ + case AF_INET: + case AF_INET6: { + const int buflen=128; + char buffer[buflen]; + int ret = getnameinfo(raw(), addressSize, buffer, buflen, NULL, 0, NI_NUMERICHOST); + massert(13082, gai_strerror(ret), ret == 0); + return buffer; + } + + case AF_UNIX: return (addressSize > 2 ? as().sun_path : "anonymous unix socket"); + case AF_UNSPEC: return "(NONE)"; + default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); return ""; + } } - bool localhost() const { return inet_addr( "127.0.0.1" ) == sa.sin_addr.s_addr; } + bool isLocalHost() const; bool operator==(const SockAddr& r) const { - return sa.sin_addr.s_addr == r.sa.sin_addr.s_addr && - sa.sin_port == r.sa.sin_port; + if (getType() != r.getType()) + return false; + + if (getPort() != r.getPort()) + return false; + + switch (getType()){ + case AF_INET: return as().sin_addr.s_addr == r.as().sin_addr.s_addr; + case AF_INET6: return memcmp(as().sin6_addr.s6_addr, r.as().sin6_addr.s6_addr, sizeof(in6_addr)) == 0; + case AF_UNIX: return strcmp(as().sun_path, r.as().sun_path) == 0; + case AF_UNSPEC: return true; // assume all unspecified addresses are the same + default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); + } } bool operator!=(const SockAddr& r) const { return !(*this == r); } bool operator<(const SockAddr& r) const { - if ( sa.sin_port >= r.sa.sin_port ) + if (getType() < r.getType()) + return true; + else if (getType() > r.getType()) return false; - return sa.sin_addr.s_addr < r.sa.sin_addr.s_addr; - } - }; - const int MaxMTU = 16384; + if (getPort() < r.getPort()) + return true; + else if (getPort() > r.getPort()) + return false; - class UDPConnection { - public: - UDPConnection() { - sock = 0; - } - ~UDPConnection() { - if ( sock ) { - closesocket(sock); - sock = 0; + switch (getType()){ + case AF_INET: return as().sin_addr.s_addr < r.as().sin_addr.s_addr; + case AF_INET6: return memcmp(as().sin6_addr.s6_addr, r.as().sin6_addr.s6_addr, sizeof(in6_addr)) < 0; + case AF_UNIX: return strcmp(as().sun_path, r.as().sun_path) < 0; + case AF_UNSPEC: return false; + default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); } } - bool init(const SockAddr& myAddr); - int recvfrom(char *buf, int len, SockAddr& sender); - int sendto(char *buf, int len, const SockAddr& EndPoint); - int mtu(const SockAddr& sa) { - return sa.isLocalHost() ? 16384 : 1480; - } - - SOCKET sock; - }; - inline int UDPConnection::recvfrom(char *buf, int len, SockAddr& sender) { - return ::recvfrom(sock, buf, len, 0, (sockaddr *) &sender.sa, &sender.addressSize); - } - - inline int UDPConnection::sendto(char *buf, int len, const SockAddr& EndPoint) { - if ( 0 && rand() < (RAND_MAX>>4) ) { - out() << " NOTSENT "; - // out() << curTimeMillis() << " .TEST: NOT SENDING PACKET" << endl; - return 0; - } - return ::sendto(sock, buf, len, 0, (sockaddr *) &EndPoint.sa, EndPoint.addressSize); - } + const sockaddr* raw() const {return (sockaddr*)&sa;} + sockaddr* raw() {return (sockaddr*)&sa;} - inline bool UDPConnection::init(const SockAddr& myAddr) { - sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if ( sock == INVALID_SOCKET ) { - out() << "invalid socket? " << OUTPUT_ERRNO << endl; - return false; - } - //out() << sizeof(sockaddr_in) << ' ' << myAddr.addressSize << endl; - if ( ::bind(sock, (sockaddr *) &myAddr.sa, myAddr.addressSize) != 0 ) { - out() << "udp init failed" << endl; - closesocket(sock); - sock = 0; - return false; - } - socklen_t optLen; - int rcvbuf; - if (getsockopt(sock, - SOL_SOCKET, - SO_RCVBUF, - (char*)&rcvbuf, - &optLen) != -1) - out() << "SO_RCVBUF:" << rcvbuf << endl; - return true; - } + socklen_t addressSize; + private: + struct sockaddr_storage sa; + }; - inline SockAddr::SockAddr(int sourcePort) { - memset(sa.sin_zero, 0, sizeof(sa.sin_zero)); - sa.sin_family = AF_INET; - sa.sin_port = htons(sourcePort); - sa.sin_addr.s_addr = htonl(INADDR_ANY); - addressSize = sizeof(sa); - } + extern SockAddr unknownAddress; // ( "0.0.0.0", 0 ) - inline SockAddr::SockAddr(const char * iporhost , int port) { - string ip = hostbyname( iporhost ); - memset(sa.sin_zero, 0, sizeof(sa.sin_zero)); - sa.sin_family = AF_INET; - sa.sin_port = htons(port); - sa.sin_addr.s_addr = inet_addr(ip.c_str()); - addressSize = sizeof(sa); - } + const int MaxMTU = 16384; inline string getHostName() { char buf[256]; int ec = gethostname(buf, 127); if ( ec || *buf == 0 ) { - log() << "can't get this server's hostname " << OUTPUT_ERRNO << endl; + log() << "can't get this server's hostname " << errnoWithDescription() << endl; return ""; } return buf; } + string getHostNameCached(); + class ListeningSockets { public: - ListeningSockets() : _sockets( new set() ){ - } - + ListeningSockets() : _mutex("ListeningSockets"), _sockets( new set() ) { } void add( int sock ){ scoped_lock lk( _mutex ); _sockets->insert( sock ); @@ -252,7 +262,6 @@ scoped_lock lk( _mutex ); _sockets->erase( sock ); } - void closeAll(){ set* s; { @@ -260,17 +269,13 @@ s = _sockets; _sockets = new set(); } - - for ( set::iterator i=s->begin(); i!=s->end(); i++ ){ + for ( set::iterator i=s->begin(); i!=s->end(); i++ ) { int sock = *i; - log() << "\t going to close listening socket: " << sock << endl; + log() << "closing listening socket: " << sock << endl; closesocket( sock ); - } - + } } - static ListeningSockets* get(); - private: mongo::mutex _mutex; set* _sockets; diff -Nru mongodb-1.4.4/util/stringutils.cpp mongodb-1.6.3/util/stringutils.cpp --- mongodb-1.4.4/util/stringutils.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/stringutils.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,44 @@ +// stringutils.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include "pch.h" + +namespace mongo { + + void splitStringDelim( const string& str , vector* res , char delim ){ + if ( str.empty() ) + return; + + size_t beg = 0; + size_t pos = str.find( delim ); + while ( pos != string::npos ){ + res->push_back( str.substr( beg, pos - beg) ); + beg = ++pos; + pos = str.find( delim, beg ); + } + res->push_back( str.substr( beg ) ); + } + + void joinStringDelim( const vector& strs , string* res , char delim ){ + for ( vector::const_iterator it = strs.begin(); it != strs.end(); ++it ){ + if ( it !=strs.begin() ) res->push_back( delim ); + res->append( *it ); + } + } + +} // namespace mongo diff -Nru mongodb-1.4.4/util/stringutils.h mongodb-1.6.3/util/stringutils.h --- mongodb-1.4.4/util/stringutils.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/stringutils.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,43 @@ +// stringutils.h + +/* Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef UTIL_STRING_UTILS_HEADER +#define UTIL_STRING_UTILS_HEADER + +namespace mongo { + + void splitStringDelim( const string& str , vector* res , char delim ); + + void joinStringDelim( const vector& strs , string* res , char delim ); + + inline string tolowerString( const string& input ){ + string::size_type sz = input.size(); + + boost::scoped_array line(new char[sz+1]); + char * copy = line.get(); + + for ( string::size_type i=0; i 0xF4) return false; // codepoint too large (< 0x10FFFF) + if (c == 0xC0 || c == 0xC1) return false; // codepoints <= 0x7F shouldn't be 2 bytes + + // still valid + left = ones-1; + } + } + if (left!=0) return false; // string ended mid-codepoint + return true; + } + + #if defined(_WIN32) + + std::string toUtf8String(const std::wstring& wide) + { + if (wide.size() > boost::integer_traits::const_max) + throw std::length_error( + "Wide string cannot be more than INT_MAX characters long."); + if (wide.size() == 0) + return ""; + + // Calculate necessary buffer size + int len = ::WideCharToMultiByte( + CP_UTF8, 0, wide.c_str(), static_cast(wide.size()), + NULL, 0, NULL, NULL); + + // Perform actual conversion + if (len > 0) + { + std::vector buffer(len); + len = ::WideCharToMultiByte( + CP_UTF8, 0, wide.c_str(), static_cast(wide.size()), + &buffer[0], static_cast(buffer.size()), NULL, NULL); + if (len > 0) + { + assert(len == static_cast(buffer.size())); + return std::string(&buffer[0], buffer.size()); + } + } + + throw boost::system::system_error( + ::GetLastError(), boost::system::system_category); + } + +#if defined(_UNICODE) + std::wstring toWideString(const char *s) { + std::basic_ostringstream buf; + buf << s; + return buf.str(); + } +#endif + + #endif + + struct TextUnitTest : public UnitTest { + void run() { + assert( parseLL("123") == 123 ); + assert( parseLL("-123000000000") == -123000000000LL ); + } + } textUnitTest; + +} + diff -Nru mongodb-1.4.4/util/text.h mongodb-1.6.3/util/text.h --- mongodb-1.4.4/util/text.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/text.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,142 @@ +// text.h +/* + * Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace mongo { + + class StringSplitter { + public: + StringSplitter( const char * big , const char * splitter ) + : _big( big ) , _splitter( splitter ){ + } + + bool more(){ + return _big[0]; + } + + string next(){ + const char * foo = strstr( _big , _splitter ); + if ( foo ){ + string s( _big , foo - _big ); + _big = foo + 1; + while ( *_big && strstr( _big , _splitter ) == _big ) + _big++; + return s; + } + + string s = _big; + _big += strlen( _big ); + return s; + } + + void split( vector& l ){ + while ( more() ){ + l.push_back( next() ); + } + } + + vector split(){ + vector l; + split( l ); + return l; + } + + static vector split( const string& big , const string& splitter ){ + StringSplitter ss( big.c_str() , splitter.c_str() ); + return ss.split(); + } + + static string join( vector& l , const string& split ){ + stringstream ss; + for ( unsigned i=0; i 0 ) + ss << split; + ss << l[i]; + } + return ss.str(); + } + + private: + const char * _big; + const char * _splitter; + }; + + /* This doesn't defend against ALL bad UTF8, but it will guarantee that the + * string can be converted to sequence of codepoints. However, it doesn't + * guarantee that the codepoints are valid. + */ + bool isValidUTF8(const char *s); + inline bool isValidUTF8(string s) { return isValidUTF8(s.c_str()); } + +#if defined(_WIN32) + + std::string toUtf8String(const std::wstring& wide); + + std::wstring toWideString(const char *s); + + /* like toWideString but UNICODE macro sensitive */ +# if !defined(_UNICODE) +#error temp error + inline std::string toNativeString(const char *s) { return s; } +# else + inline std::wstring toNativeString(const char *s) { return toWideString(s); } +# endif + +#endif + + // expect that n contains a base ten number and nothing else after it + // NOTE win version hasn't been tested directly + inline long long parseLL( const char *n ) { + long long ret; + uassert( 13307, "cannot convert empty string to long long", *n != 0 ); +#if !defined(_WIN32) + char *endPtr = 0; + errno = 0; + ret = strtoll( n, &endPtr, 10 ); + uassert( 13305, "could not convert string to long long", *endPtr == 0 && errno == 0 ); +#elif _MSC_VER>=1600 // 1600 is VS2k10 1500 is VS2k8 + size_t endLen = 0; + try { + ret = stoll( n, &endLen, 10 ); + } catch ( ... ) { + endLen = 0; + } + uassert( 13306, "could not convert string to long long", endLen != 0 && n[ endLen ] == 0 ); +#else // stoll() wasn't introduced until VS 2010. + char* endPtr = 0; + ret = _strtoi64( n, &endPtr, 10 ); + uassert( 13310, "could not convert string to long long", (*endPtr == 0) && (ret != _I64_MAX) && (ret != _I64_MIN) ); +#endif // !defined(_WIN32) + return ret; + } +} diff -Nru mongodb-1.4.4/util/thread_pool.cpp mongodb-1.6.3/util/thread_pool.cpp --- mongodb-1.4.4/util/thread_pool.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/thread_pool.cpp 1969-12-31 16:00:00.000000000 -0800 @@ -1,139 +0,0 @@ -/* threadpool.cpp -*/ - -/* Copyright 2009 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "stdafx.h" -#include "thread_pool.h" -#include "mvar.h" - - -namespace mongo{ -namespace threadpool{ - -// Worker thread -class Worker : boost::noncopyable { -public: - explicit Worker(ThreadPool& owner) - : _owner(owner) - , _is_done(true) - , _thread(boost::bind(&Worker::loop, this)) - {} - - // destructor will block until current operation is completed - // Acts as a "join" on this thread - ~Worker(){ - _task.put(Task()); - _thread.join(); - } - - void set_task(Task& func){ - assert(!func.empty()); - assert(_is_done); - _is_done = false; - - _task.put(func); - } - - private: - ThreadPool& _owner; - MVar _task; - bool _is_done; // only used for error detection - boost::thread _thread; - - void loop(){ - while (true) { - Task task = _task.take(); - if (task.empty()) - break; // ends the thread - - try { - task(); - } catch (std::exception e){ - log() << "Unhandled exception in worker thread: " << e.what() << endl;; - } catch (...){ - log() << "Unhandled non-exception in worker thread" << endl; - } - _is_done = true; - _owner.task_done(this); - } - } -}; - -ThreadPool::ThreadPool(int nThreads) - : _tasksRemaining(0) - , _nThreads(nThreads) -{ - scoped_lock lock(_mutex); - while (nThreads-- > 0){ - Worker* worker = new Worker(*this); - _freeWorkers.push_front(worker); - } -} - -ThreadPool::~ThreadPool(){ - join(); - - assert(_tasks.empty()); - - // O(n) but n should be small - assert(_freeWorkers.size() == (unsigned)_nThreads); - - while(!_freeWorkers.empty()){ - delete _freeWorkers.front(); - _freeWorkers.pop_front(); - } -} - -void ThreadPool::join(){ - scoped_lock lock(_mutex); - while(_tasksRemaining){ - _condition.wait(lock.boost()); - } -} - -void ThreadPool::schedule(Task task){ - scoped_lock lock(_mutex); - - _tasksRemaining++; - - if (!_freeWorkers.empty()){ - _freeWorkers.front()->set_task(task); - _freeWorkers.pop_front(); - }else{ - _tasks.push_back(task); - } -} - -// should only be called by a worker from the worker thread -void ThreadPool::task_done(Worker* worker){ - scoped_lock lock(_mutex); - - if (!_tasks.empty()){ - worker->set_task(_tasks.front()); - _tasks.pop_front(); - }else{ - _freeWorkers.push_front(worker); - } - - _tasksRemaining--; - - if(_tasksRemaining == 0) - _condition.notify_all(); -} - -} //namespace threadpool -} //namespace mongo diff -Nru mongodb-1.4.4/util/thread_pool.h mongodb-1.6.3/util/thread_pool.h --- mongodb-1.4.4/util/thread_pool.h 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/thread_pool.h 1969-12-31 16:00:00.000000000 -0800 @@ -1,82 +0,0 @@ -// thread_pool.h - -/* Copyright 2009 10gen Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#undef assert -#define assert xassert - -namespace mongo { - -namespace threadpool { - class Worker; - - typedef boost::function Task; //nullary function or functor - - // exported to the mongo namespace - class ThreadPool : boost::noncopyable{ - public: - explicit ThreadPool(int nThreads=8); - - // blocks until all tasks are complete (tasks_remaining() == 0) - // You should not call schedule while in the destructor - ~ThreadPool(); - - // blocks until all tasks are complete (tasks_remaining() == 0) - // does not prevent new tasks from being scheduled so could wait forever. - // Also, new tasks could be scheduled after this returns. - void join(); - - - // task will be copied a few times so make sure it's relatively cheap - void schedule(Task task); - - // Helpers that wrap schedule and boost::bind. - // Functor and args will be copied a few times so make sure it's relatively cheap - template - void schedule(F f, A a){ schedule(boost::bind(f,a)); } - template - void schedule(F f, A a, B b){ schedule(boost::bind(f,a,b)); } - template - void schedule(F f, A a, B b, C c){ schedule(boost::bind(f,a,b,c)); } - template - void schedule(F f, A a, B b, C c, D d){ schedule(boost::bind(f,a,b,c,d)); } - template - void schedule(F f, A a, B b, C c, D d, E e){ schedule(boost::bind(f,a,b,c,d,e)); } - - - int tasks_remaining() { return _tasksRemaining; } - - private: - mongo::mutex _mutex; - boost::condition _condition; - - list _freeWorkers; //used as LIFO stack (always front) - list _tasks; //used as FIFO queue (push_back, pop_front) - int _tasksRemaining; // in queue + currently processing - int _nThreads; // only used for sanity checking. could be removed in the future. - - // should only be called by a worker from the worker's thread - void task_done(Worker* worker); - friend class Worker; - }; - -} //namespace threadpool - -using threadpool::ThreadPool; - -} //namespace mongo diff -Nru mongodb-1.4.4/util/util.cpp mongodb-1.6.3/util/util.cpp --- mongodb-1.4.4/util/util.cpp 2010-06-30 00:03:29.000000000 -0700 +++ mongodb-1.6.3/util/util.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -1,4 +1,4 @@ -// util.cpp +// @file util.cpp /* Copyright 2009 10gen Inc. * @@ -15,7 +15,7 @@ * limitations under the License. */ -#include "stdafx.h" +#include "pch.h" #include "goodies.h" #include "unittest.h" #include "file_allocator.h" @@ -23,18 +23,71 @@ namespace mongo { - vector *UnitTest::tests = 0; - bool UnitTest::running = false; + boost::thread_specific_ptr _threadName; + + void _setThreadName( const char * name ){ + static int N = 0; + if ( strcmp( name , "conn" ) == 0 ){ + stringstream ss; + ss << name << ++N; + _threadName.reset( new string( ss.str() ) ); + } + else { + _threadName.reset( new string(name) ); + } + } + +#if defined(_WIN32) +#define MS_VC_EXCEPTION 0x406D1388 +#pragma pack(push,8) + typedef struct tagTHREADNAME_INFO + { + DWORD dwType; // Must be 0x1000. + LPCSTR szName; // Pointer to name (in user addr space). + DWORD dwThreadID; // Thread ID (-1=caller thread). + DWORD dwFlags; // Reserved for future use, must be zero. + } THREADNAME_INFO; +#pragma pack(pop) + + void setThreadName(const char *name) + { + _setThreadName( name ); + Sleep(10); + THREADNAME_INFO info; + info.dwType = 0x1000; + info.szName = name; + info.dwThreadID = -1; + info.dwFlags = 0; + __try + { + RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info ); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + } + } +#else + void setThreadName(const char * name ) { + _setThreadName( name ); + } +#endif - Nullstream nullstream; + string getThreadName(){ + string * s = _threadName.get(); + if ( s ) + return *s; + return ""; + } - thread_specific_ptr Logstream::tsp; + vector *UnitTest::tests = 0; + bool UnitTest::running = false; const char *default_getcurns() { return ""; } const char * (*getcurns)() = default_getcurns; int logLevel = 0; - mongo::mutex Logstream::mutex; + int tlogLevel = 0; + mongo::mutex Logstream::mutex("Logstream"); int Logstream::doneSetup = Logstream::magicNumber(); bool goingAway = false; @@ -103,38 +156,26 @@ } printStackTrace(); } - + + /* note: can't use malloc herein - may be in signal handler. + logLockless() likely does not comply and should still be fixed todo + */ void rawOut( const string &s ) { if( s.empty() ) return; - char now[64]; - time_t_to_String(time(0), now); - now[20] = 0; -#if defined(_WIN32) - (std::cout << now << " " << s).flush(); -#else - write( STDOUT_FILENO, now, 20 ); - write( STDOUT_FILENO, " ", 1 ); - write( STDOUT_FILENO, s.c_str(), s.length() ); - fsync( STDOUT_FILENO ); -#endif - } - -#ifndef _SCONS - // only works in scons - const char * gitVersion(){ return ""; } - const char * sysInfo(){ return ""; } -#endif - void printGitVersion() { log() << "git version: " << gitVersion() << endl; } - void printSysInfo() { log() << "sys info: " << sysInfo() << endl; } - string mongodVersion() { - stringstream ss; - ss << "db version v" << versionString << ", pdfile version " << VERSION << "." << VERSION_MINOR; - return ss.str(); + char buf[64]; + time_t_to_String( time(0) , buf ); + /* truncate / don't show the year: */ + buf[19] = ' '; + buf[20] = 0; + + Logstream::logLockless(buf); + Logstream::logLockless(s); + Logstream::logLockless("\n"); } ostream& operator<<( ostream &s, const ThreadSafeString &o ){ - s << (string)o; + s << o.toString(); return s; } diff -Nru mongodb-1.4.4/util/version.cpp mongodb-1.6.3/util/version.cpp --- mongodb-1.4.4/util/version.cpp 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/version.cpp 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,97 @@ +#include "pch.h" + +#include +#include +#include +#include +#include + +#include "version.h" + +namespace mongo { + + // + // mongo processes version support + // + + const char versionString[] = "1.6.3"; + + string mongodVersion() { + stringstream ss; + ss << "db version v" << versionString << ", pdfile version " << VERSION << "." << VERSION_MINOR; + return ss.str(); + } + + // + // git version support + // + +#ifndef _SCONS + // only works in scons + const char * gitVersion(){ return "not-scons"; } +#endif + + void printGitVersion() { log() << "git version: " << gitVersion() << endl; } + + // + // sys info support + // + +#ifndef _SCONS +#if defined(_WIN32) + string sysInfo(){ + stringstream ss; + ss << "not-scons win"; + ss << " mscver:" << _MSC_FULL_VER << " built:" << __DATE__; + ss << " boostver:" << BOOST_VERSION; +#if( !defined(_MT) ) +#error _MT is not defined +#endif + ss << (sizeof(char *) == 8) ? " 64bit" : " 32bit"; + return ss.str(); + } +#else + string sysInfo(){ return ""; } +#endif +#endif + + void printSysInfo() { log() << "sys info: " << sysInfo() << endl; } + + // + // 32 bit systems warning + // + + void show_warnings(){ + // each message adds a leading but not a trailing newline + + bool warned = false; + { + const char * foo = strchr( versionString , '.' ) + 1; + int bar = atoi( foo ); + if ( ( 2 * ( bar / 2 ) ) != bar ) { + cout << "\n** NOTE: This is a development version (" << versionString << ") of MongoDB."; + cout << "\n** Not recommended for production." << endl; + warned = true; + } + } + + if ( sizeof(int*) == 4 ) { + cout << endl; + cout << "** NOTE: when using MongoDB 32 bit, you are limited to about 2 gigabytes of data" << endl; + cout << "** see http://blog.mongodb.org/post/137788967/32-bit-limitations" << endl; + warned = true; + } + +#ifdef __linux__ + if (boost::filesystem::exists("/proc/vz") && !boost::filesystem::exists("/proc/bc")){ + cout << endl; + cout << "** WARNING: You are running in OpenVZ. This is known to be broken!!!" << endl; + warned = true; + } +#endif + + if (warned) + cout << endl; + } + +} diff -Nru mongodb-1.4.4/util/version.h mongodb-1.6.3/util/version.h --- mongodb-1.4.4/util/version.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/version.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,24 @@ +#ifndef UTIL_VERSION_HEADER +#define UTIL_VERSION_HEADER + +#include + +namespace mongo { + + using std::string; + + // mongo version + extern const char versionString[]; + string mongodVersion(); + + const char * gitVersion(); + void printGitVersion(); + + string sysInfo(); + void printSysInfo(); + + void show_warnings(); + +} // namespace mongo + +#endif // UTIL_VERSION_HEADER diff -Nru mongodb-1.4.4/util/winutil.h mongodb-1.6.3/util/winutil.h --- mongodb-1.4.4/util/winutil.h 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/util/winutil.h 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,44 @@ +// @file winutil.cpp : Windows related utility functions +// +// /** +// * Copyright (C) 2008 10gen Inc. +// * +// * This program is free software: you can redistribute it and/or modify +// * it under the terms of the GNU Affero General Public License, version 3, +// * as published by the Free Software Foundation. +// * +// * This program is distributed in the hope that it will be useful, +// * but WITHOUT ANY WARRANTY; without even the implied warranty of +// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// * GNU Affero General Public License for more details. +// * +// * You should have received a copy of the GNU Affero General Public License +// * along with this program. If not, see . +// */ +// +// #include "pch.h" + +#pragma once + +#if defined(_WIN32) +#include +#include "text.h" + +namespace mongo { + + inline string GetWinErrMsg(DWORD err) { + LPTSTR errMsg; + ::FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPTSTR)&errMsg, 0, NULL ); + std::string errMsgStr = toUtf8String( errMsg ); + ::LocalFree( errMsg ); + // FormatMessage() appends a newline to the end of error messages, we trim it because endl flushes the buffer. + errMsgStr = errMsgStr.erase( errMsgStr.length() - 2 ); + std::ostringstream output; + output << errMsgStr << " (" << err << ")"; + + return output.str(); + } +} + +#endif + diff -Nru mongodb-1.4.4/valgrind.suppressions mongodb-1.6.3/valgrind.suppressions --- mongodb-1.4.4/valgrind.suppressions 1969-12-31 16:00:00.000000000 -0800 +++ mongodb-1.6.3/valgrind.suppressions 2010-09-24 10:02:42.000000000 -0700 @@ -0,0 +1,8 @@ +{ + Command_Static_Stuff + Memcheck:Leak + ... + fun:_ZNSsC1EPKcRKSaIcE + ... +} +
    " << *headers << "