How lambda is implemented in the compiler

In C++, the result of a lambda-expression is called a closure object. This article is not intended to introduce the usage of C++ lambdas (this is covered in detail in “TC++PL” and “C++ Primer,” or you can refer to my previous summary C++11 Syntactic Sugar #lambda Expressions), but rather to analyze how lambda-expression is implemented in Clang from the perspective of LLVM-IR.

The C++ standard describes lambda as follows:

[ISO/IEC 14882:2014 §5.1.2.2] The evaluation of a lambda-expression results in a prvalue temporary (12.2). This temporary is called the closure object.
The type of the lambda-expression (which is also the type of the closure object) is a unique, unnamed non-union class type — called the closure type — whose properties are described below. This class type is neither an aggregate (8.5.1) nor a literal type (3.9).
A closure object behaves like a function object (20.9).

The standard mentions that the closure type is a unique, non-union class type. Let’s take a look at how Clang implements lambda:

int main(){
	int x=123;
	double y=456;
	auto example=[&](int z)mutable{x=567;y=789;z=666;};
	example(111);
}

The above code is a lambda object that captures an int and a double object and takes an int parameter. Let’s look at its LLVM-IR code:

%class.anon = type { i32*, double* }

; Function Attrs: norecurse uwtable
define i32 @main() #4 {
  %1 = alloca i32, align 4
  %2 = alloca double, align 8
  %3 = alloca %class.anon, align 8
  store i32 123, i32* %1, align 4
  store double 4.560000e+02, double* %2, align 8
  %4 = getelementptr inbounds %class.anon, %class.anon* %3, i32 0, i32 0
  store i32* %1, i32** %4, align 8
  %5 = getelementptr inbounds %class.anon, %class.anon* %3, i32 0, i32 1
  store double* %2, double** %5, align 8
  call void @"_ZZ4mainEN3$_0clEi"(%class.anon* %3, i32 111)
  ret i32 0
}

; Function Attrs: inlinehint nounwind uwtable
define internal void @"_ZZ4mainEN3$_0clEi"(%class.anon*, i32) #5 align 2 {
  %3 = alloca %class.anon*, align 8
  %4 = alloca i32, align 4
  store %class.anon* %0, %class.anon** %3, align 8
  store i32 %1, i32* %4, align 4
  %5 = load %class.anon*, %class.anon** %3, align 8
  %6 = getelementptr inbounds %class.anon, %class.anon* %5, i32 0, i32 0
  %7 = load i32*, i32** %6, align 8
  store i32 567, i32* %7, align 4
  %8 = getelementptr inbounds %class.anon, %class.anon* %5, i32 0, i32 1
  %9 = load double*, double** %8, align 8
  store double 7.890000e+02, double* %9, align 8
  store i32 666, i32* %4, align 4
  ret void
}

Here are a few key parts to focus on:

%class.anon = type { i32*, double* }

%3 = alloca %class.anon, align 8
call void @"_ZZ4mainEN3$_0clEi"(%class.anon* %3, i32 111)

define internal void @"_ZZ4mainEN3$_0clEi"(%class.anon*, i32) #5 align 2

It can be seen that the implementation of lambda in LLVM is an anonymous class type object that overloads operator(), where the captured parameters are stored as data members of this class, and the parameters received during the call are those received by the operator().
Next, let’s look at a function object (a class that overloads operator()) that I manually wrote to achieve the same functionality as the lambda expression above:

class A{
public:
	A(int &a,double &b):x(a),y(b){}
	void operator()(int z){
		x=567;y=789;z=666;
	}
private:
	int &x;
	double &y;
};
int main(){
	int x=123;
	double y=456;
	A example(x,y);
	example(111);
}

Its LLVM-IR code is:

%class.A = type { i32*, double* }

; Function Attrs: norecurse uwtable
define i32 @main() #4 {
  %1 = alloca i32, align 4
  %2 = alloca double, align 8
  %3 = alloca %class.A, align 8
  store i32 123, i32* %1, align 4
  store double 4.560000e+02, double* %2, align 8
  call void @_ZN1AC2ERiRd(%class.A* %3, i32* dereferenceable(4) %1, double* dereferenceable(8) %2)
  call void @_ZN1AclEi(%class.A* %3, i32 111)
  ret i32 0
}

; Function Attrs: nounwind uwtable
define linkonce_odr void @_ZN1AC2ERiRd(%class.A*, i32* dereferenceable(4), double* dereferenceable(8)) unnamed_addr #5 comdat align 2 {
  %4 = alloca %class.A*, align 8
  %5 = alloca i32*, align 8
  %6 = alloca double*, align 8
  store %class.A* %0, %class.A** %4, align 8
  store i32* %1, i32** %5, align 8
  store double* %2, double** %6, align 8
  %7 = load %class.A*, %class.A** %4, align 8
  %8 = getelementptr inbounds %class.A, %class.A* %7, i32 0, i32 0
  %9 = load i32*, i32** %5, align 8
  store i32* %9, i32** %8, align 8
  %10 = getelementptr inbounds %class.A, %class.A* %7, i32 0, i32 1
  %11 = load double*, double** %6, align 8
  store double* %11, double** %10, align 8
  ret void
}

; Function Attrs: nounwind uwtable
define linkonce_odr void @_ZN1AclEi(%class.A*, i32) #5 comdat align 2 {
  %3 = alloca %class.A*, align 8
  %4 = alloca i32, align 4
  store %class.A* %0, %class.A** %3, align 8
  store i32 %1, i32* %4, align 4
  %5 = load %class.A*, %class.A** %3, align 8
  %6 = getelementptr inbounds %class.A, %class.A* %5, i32 0, i32 0
  %7 = load i32*, i32** %6, align 8
  store i32 567, i32* %7, align 4
  %8 = getelementptr inbounds %class.A, %class.A* %5, i32 0, i32 1
  %9 = load double*, double** %8, align 8
  store double 7.890000e+02, double* %9, align 8
  store i32 666, i32* %4, align 4
  ret void
}

Now let’s compare the three key parts mentioned above:

# lambda
%class.anon = type { i32*, double* }
# function object
%class.A = type { i32*, double* }

# lambda
%3 = alloca %class.anon, align 8
call void @"_ZZ4mainEN3$_0clEi"(%class.anon* %3, i32 111)
# function object
%3 = alloca %class.A, align 8
call void @_ZN1AclEi(%class.A* %3, i32 111)

# lambda
define internal void @"_ZZ4mainEN3$_0clEi"(%class.anon*, i32) #5 align 2
# function object
define linkonce_odr void @_ZN1AclEi(%class.A*, i32) #5 comdat align 2

It can be seen that the hand-written function object is exactly the same as the lambda expression generated by the compiler…
The objects captured by the capture list are stored as data members of the class generated by the compiler, while the receiving parameters are obtained as parameters of the operator(). The main difference between the IR code for the lambda and my manually written one is that the lambda does not generate a corresponding constructor.
Thus, it can be concluded that in Clang, a lambda is implemented as a function object…

However, this raises another question: since a lambda is a function object, can I access the this pointer within the lambda’s function body (the this of the lambda)?
The answer is no. Here’s an example:

class A
{
public:
  A(){
    auto callprint=[]{this->print();};
    callprint();
  }
  void print()
  {
    std::cout<<"A::print"<<std::endl;
  }
};

Attempting to compile code that uses this without capturing it in the lambda will result in the following compilation error:

1	error: 'this' cannot be implicitly captured in this context

The capture list of the lambda can capture this:

class A
{
public:
  A(){
    auto callprint=[this]{this->print();};
    callprint();
  }
  void print()
  {
    std::cout<<"A::print, ival is "<<ival<<std::endl;
  }
private:
  int ival=123;
};

// output
// A::print, ival is 123

Furthermore, having lambda combined with STL’s <functional> is simply a powerful technique!