Improved Cooley-Tukey

author: Roland Stigge <stigge@antcom.de> 2019-01-26 23:19:38 +0100
committer: Roland Stigge <stigge@antcom.de> 2019-01-26 23:19:38 +0100
commit: db0d20a6c322211593fecf209898f2435468e813 (patch)
tree: 5cf1bbe9f9112cb1e6e5a3b31e2222aa3ba3b638
parent: 9e238820cb4df45219cf122206aa0ae153e81c60 (diff)
2 files changed, 230 insertions, 28 deletions
diff --git a/Makefile b/Makefile
index 7b3a78f..8a88b0c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,16 @@
-CXX=clang++-7
+CXX=clang++-7 -stdlib=libc++
+CXXFLAGS=-stdlib=libc++ -Wall -O2 -std=c++17
+#-march=native -mtune=native # is not better for llvm
+
+# libstdc++-8 doesn't have transform_reduce
+#CXX=g++-8
+#CXXFLAGS=-Wall -O2 -std=c++17 -nostdinc++ -I/usr/lib/llvm-7/include/c++/v1 -nodefaultlibs -lc++ -lc++abi -lm -lc -lgcc_s -lgcc
+# -march=native -mtune=native # doesn't help for gcc
 
 all: fft
 
 fft: fft.cpp
-	$(CXX) -Wall -O2 -std=c++17 -o $@ $^
+	$(CXX) $(CXXFLAGS) -o $@ $^
 
 install:
 
diff --git a/fft.cpp b/fft.cpp
index 8613dd9..e7d5de6 100644
--- a/fft.cpp
+++ b/fft.cpp
@@ -1,15 +1,51 @@
+// FFT
+
+#include <complex>
+#include <chrono>
+#include <cmath>
+#include <ctime>
+#include <exception>
+#include <functional>
 #include <iostream>
+#include <memory>
+#include <numeric>
 #include <string>
 #include <vector>
-#include <complex>
-#include <cmath>
-#include <chrono>
 
 const double PI = std::acos(-1);
 const std::complex<double> i(0, 1);
 
+const double tolerance = 0.000001;
+
 using namespace std::complex_literals;
 
+/// CPU time clock
+class Timer {
+public:
+	Timer(): mStart(std::clock()){}
+	void start(){mStart = std::clock();}
+	double elapsed() { return (double(std::clock()) - mStart) / CLOCKS_PER_SEC; }
+	static double precision() {
+		static double result{0};
+		
+		if (result != 0)
+			return result;
+		
+		std::clock_t start = std::clock();
+		while (start == std::clock()) {}
+		start = std::clock();
+
+		std::clock_t end = start;
+		while (end == std::clock()) {}
+		end = std::clock();
+
+		result = (double(end) - start) / CLOCKS_PER_SEC;
+		return result;
+	}
+private:
+	std::clock_t mStart;
+};
+
 // (Slow) DFT
 std::vector<std::complex<double>> dft(const std::vector<std::complex<double>> &v) {
 	std::vector<std::complex<double>> result(v.size(), 0);
@@ -36,19 +72,19 @@ void generate(std::vector<std::complex<double>>& v) {
     }
 }
 
+namespace CooleyTukey {
 
 // separate even/odd elements to lower/upper halves of array respectively.
 // Due to Butterfly combinations, this turns out to be the simplest way 
 // to get the job done without clobbering the wrong elements.
-void separate (std::complex<double>* a, int n) {
-    std::complex<double>* b = new std::complex<double>[n/2];  // get temp heap storage
-    for(int i=0; i<n/2; i++)    // copy all odd elements to heap storage
-        b[i] = a[i*2+1];
-    for(int i=0; i<n/2; i++)    // copy all even elements to lower-half of a[]
-        a[i] = a[i*2];
-    for(int i=0; i<n/2; i++)    // copy all odd (from heap) to upper-half of a[]
-        a[i+n/2] = b[i];
-    delete[] b;                 // delete heap storage
+void separate(std::vector<std::complex<double>>::iterator a, int n) {
+	std::vector<std::complex<double>> b(n/2);
+	for(int i=0; i<n/2; i++)    // copy all odd elements to heap storage
+		b[i] = a[i*2+1];
+	for(int i=0; i<n/2; i++)    // copy all even elements to lower-half of a[]
+		a[i] = a[i*2];
+	for(int i=0; i<n/2; i++)    // copy all odd (from heap) to upper-half of a[]
+		a[i+n/2] = b[i];
 }
 
 // N must be a power-of-2, or bad things will happen.
@@ -58,14 +94,14 @@ void separate (std::complex<double>* a, int n) {
 // Because of Nyquist theorem, N samples means 
 // only first N/2 FFT results in X[] are the answer.
 // (upper half of X[] is a reflection with no new information).
-void fft2 (std::complex<double>* X, int N) {
+void fft_recursive(std::vector<std::complex<double>>::iterator X, int N) {
     if(N < 2) {
         // bottom of recursion.
         // Do nothing here, because already X[0] = x[0]
     } else {
         separate(X,N);      // all evens to lower half, all odds to upper half
-        fft2(X,     N/2);   // recurse even items
-        fft2(X+N/2, N/2);   // recurse odd  items
+        fft_recursive(X,     N/2);   // recurse even items
+        fft_recursive(X+N/2, N/2);   // recurse odd  items
         // combine results of two half recursions
         for(int k=0; k<N/2; k++) {
             std::complex<double> e = X[k    ];   // even
@@ -79,27 +115,186 @@ void fft2 (std::complex<double>* X, int N) {
 }
 
 // Cooley-Tukey
-// TODO: Use fft2 via iterators!
-std::vector<std::complex<double>> fft1(const std::vector<std::complex<double>> &v) {
-	std::vector<std::complex<double>> result(v.size(), 0);
+std::vector<std::complex<double>> fft(const std::vector<std::complex<double>> &v) {
+	std::vector<std::complex<double>> result(v);
+
+	fft_recursive(std::begin(result), result.size());
 	
 	return result;
 }
+}; // namespace CooleyTukey
 
-int main(int argc, char* argv[]) {
-	std::vector<std::complex<double>> v(1024, 0);
+// FFT Roland Reichwein
+class RR {
+private:
+	int mSize;
+	std::vector<int> order;
+	std::vector<std::complex<double>> expLUT;
 
-	generate(v);
+public:
+	RR(int size): mSize(size), order(size), expLUT(size/2) {
+		// reorder LUT
+		for (int i = 0; i < size; ++i) {
+			order[i] = bitreverse(i);
+		}
+
+		// exp LUT
+		for (int i = 0; i < size / 2; ++i) {
+			expLUT[i] = exp(std::complex<double>(0,-2.*M_PI*i/size));
+		}
+	}
+
+	std::vector<std::complex<double>> fft(const std::vector<std::complex<double>> &v) {
+		if (v.size() != mSize)
+			throw std::length_error("Bad input size");
+
+		std::vector<std::complex<double>> result;
+
+		reorder(v, result);
+		fft_recursive(std::begin(result), mSize);
+		
+		return result;
+	}
 
-	std::vector<std::complex<double>> v_dft = dft(v);
+// TODO: option for only half FFT due to real redundancy
 
-	std::vector<std::complex<double>> v_fft = fft1(v);
+private:
+	int bitreverse(int i) {
+		int size{mSize};
+		int result{0};
+
+		while (size > 1) {
+			result <<= 1;
+			result |= i & 1;
+			i >>= 1;
+			size >>= 1;
+		}
+
+		return result;
+	}
+
+	void reorder(const std::vector<std::complex<double>>& src, std::vector<std::complex<double>>& dst) {
+		dst.resize(src.size());
+		for (int i = 0; i < mSize; ++i) {
+			dst[order[i]] = src[i];
+		}
+	}
 
-	if (v_dft != v_fft) {
-		std::cout << "Error: Results differ!\n";
-		return 1;
+	// N must be a power-of-2, or bad things will happen.
+	// Currently no check for this condition.
+	//
+	// N input samples in X[] are FFT'd and results left in X[].
+	// Because of Nyquist theorem, N samples means 
+	// only first N/2 FFT results in X[] are the answer.
+	// (upper half of X[] is a reflection with no new information).
+	void fft_recursive(std::vector<std::complex<double>>::iterator X, int N) {
+		if(N > 2) {
+			fft_recursive(X,     N/2);   // recurse even items
+			fft_recursive(X+N/2, N/2);   // recurse odd  items
+		}
+		// combine results of two half recursions
+		for(int k=0; k<N/2; k++) {
+			std::complex<double> e = X[k    ];   // even
+			std::complex<double> o = X[k+N/2];   // odd
+			// w is the "twiddle-factor"
+			std::complex<double> w = expLUT[k * mSize / N];
+			X[k    ] = e + w * o;
+			X[k+N/2] = e - w * o;
+		}
 	}
+
+}; // class RR
+
+class Measure {
+	Timer mTimer;
+
+public:
+	using Data = std::vector<std::complex<double>>;
+	
+protected:
+	Measure* mReference; // Reference measurement: we should calculate similar to this one and be faster than this one
 	
+	const Data& mIn;
+	Data mResult;
+
+	std::string mName;
+	double mElapsed;
+
+public:
+	Measure(const Data& in): mReference(nullptr), mIn(in), mResult(in.size()) {}
+
+	virtual void run_impl() = 0; // implemented by subclasses
+
+	void run() {
+		mTimer.start();
+		run_impl();
+		mElapsed = mTimer.elapsed();
+		std::cout << mName << ": " << mElapsed << "s\n";
+
+		if (mReference) {
+			if (mResult != mReference->mResult) {
+				double diff = std::transform_reduce(std::begin(mReference->mResult), std::end(mReference->mResult), std::begin(mResult), double(0.0),
+								    [](const double& x, const double& y) -> double
+								    { return x + y;},
+								    [](const std::complex<double>& x, const std::complex<double>&y) -> double
+								    { return abs(y - x);}
+								    );
+				if (diff > tolerance)
+					std::cout << "Error: Results diff: " << diff << "\n";
+			}
+
+			if (mElapsed > mReference->mElapsed) {
+				std::cout << "Error: " << mName << " too slow!\n";
+			}
+		}
+	}
+
+	double elapsed() { return mElapsed; }
+
+	Data& result() { return mResult; }
+};
+
+class MeasureDFT: public Measure {
+public:
+	MeasureDFT(const Data& in): Measure(in){ mName = "DFT";}
+	void run_impl() override {
+		mResult = dft(mIn);
+	}
+};
+
+class MeasureFFT: public Measure {
+public:
+	MeasureFFT(const Data& in, Measure& reference): Measure(in) { mName = "FFT Cooley-Tukey"; mReference = &reference;}
+	void run_impl() override {
+		mResult = CooleyTukey::fft(mIn);
+	}
+};
+
+class MeasureFFT_RR: public Measure {
+	RR mRR;
+public:
+	MeasureFFT_RR(const Data& in, Measure& reference): Measure(in), mRR(in.size()){ mName = "FFT RR"; mReference = &reference;}
+	void run_impl() override {
+		mResult = mRR.fft(mIn);
+	}
+};
+
+int main(int argc, char* argv[]) {
+	std::vector<std::complex<double>> v(4096, 0);
+
+	generate(v);
+
+	std::cout << "Timer precision: " << Timer::precision() << "s\n";
+
+	MeasureDFT measureDFT(v);
+	measureDFT.run();
+
+	MeasureFFT measureFFT(v, measureDFT);
+	measureFFT.run();
+
+	MeasureFFT_RR measureFFT_RR(v, measureFFT);
+	measureFFT_RR.run();
+
 	return 0;
 }
author	Roland Stigge <stigge@antcom.de>	2019-01-26 23:19:38 +0100
committer	Roland Stigge <stigge@antcom.de>	2019-01-26 23:19:38 +0100
commit	db0d20a6c322211593fecf209898f2435468e813 (patch)
tree	5cf1bbe9f9112cb1e6e5a3b31e2222aa3ba3b638
parent	9e238820cb4df45219cf122206aa0ae153e81c60 (diff)