DFT¶
cmul
function¶
template <typename T, size_t N1, size_t N2>
vec<T, const_max(N1, N2)> cmul(const vec<T, N1> &x,
const vec<T, N2> &y)
Complex Multiplication
Source code
template <typename T, size_t N1, size_t N2>
KFR_INTRINSIC vec<T, const_max(N1, N2)> cmul(const vec<T, N1>& x, const vec<T, N2>& y)
{
return intrinsics::cmul_impl(x, y);
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/impl/ft.hpp#L74
dct_plan
dct_plan
class¶
template <typename T> dct_plan
DCT type 2 (unscaled)
Source code
template <typename T>
struct dct_plan : dft_plan<T>
{
dct_plan(size_t size) : dft_plan<T>(size) { this->temp_size += sizeof(complex<T>) * size * 2; }
#ifdef KFR_DFT_MULTI
dct_plan(cpu_t cpu, size_t size) : dft_plan<T>(cpu, size)
{
this->temp_size += sizeof(complex<T>) * size * 2;
}
#endif
KFR_MEM_INTRINSIC void execute(T* out, const T* in, u8* temp, bool inverse = false) const
{
const size_t size = this->size;
const size_t halfSize = size / 2;
univector_ref<complex<T>> mirrored = make_univector(
ptr_cast<complex<T>>(temp + this->temp_size - sizeof(complex<T>) * size * 2), size);
univector_ref<complex<T>> mirrored_dft =
make_univector(ptr_cast<complex<T>>(temp + this->temp_size - sizeof(complex<T>) * size), size);
auto t = counter() * c_pi<T> / (size * 2);
if (!inverse)
{
for (size_t i = 0; i < halfSize; i++)
{
mirrored[i] = in[i * 2];
mirrored[size - 1 - i] = in[i * 2 + 1];
}
if (size % 2)
{
mirrored[halfSize] = in[size - 1];
}
dft_plan<T>::execute(mirrored_dft.data(), mirrored.data(), temp, cfalse);
make_univector(out, size) = real(mirrored_dft) * cos(t) + imag(mirrored_dft) * sin(t);
}
else
{
mirrored = make_complex(make_univector(in, size) * cos(t), make_univector(in, size) * -sin(t));
dft_plan<T>::execute(mirrored_dft.data(), mirrored.data(), temp, cfalse);
for (size_t i = 0; i < halfSize; i++)
{
out[i * 2 + 0] = mirrored_dft[i].real();
out[i * 2 + 1] = mirrored_dft[size - 1 - i].real();
}
if (size % 2)
{
out[size - 1] = mirrored_dft[halfSize].real();
}
}
}
template <univector_tag Tag1, univector_tag Tag2, univector_tag Tag3>
KFR_MEM_INTRINSIC void execute(univector<T, Tag1>& out, const univector<T, Tag2>& in,
univector<u8, Tag3>& temp, bool inverse = false) const
{
execute(out.data(), in.data(), temp.data(), inverse);
}
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/fft.hpp#L403
dft
function¶
template <typename T, univector_tag Tag>
univector<complex<T>>
dft(const univector<complex<T>, Tag> &input)
Performs Direct DFT using cached plan
Source code
template <typename T, univector_tag Tag>
univector<complex<T>> dft(const univector<complex<T>, Tag>& input)
{
dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), input.size());
univector<complex<T>> output(input.size(), std::numeric_limits<T>::quiet_NaN());
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp);
return output;
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L130
dft_plan
dft_plan
class¶
template <typename T> dft_plan
Class for performing DFT/FFT
Source code
template <typename T>
struct dft_plan
https://github.com/kfrlib/kfr/blob//include/kfr/dft/fft.hpp#L119
dft_plan
template <typename T> dft_plan
Class for performing DFT/FFT
Source code
template <typename T>
struct dft_plan
{
size_t size;
size_t temp_size;
#ifdef KFR_DFT_MULTI
explicit dft_plan(cpu_t cpu, size_t size, dft_order order = dft_order::normal)
: size(size), temp_size(0), data_size(0)
{
if (cpu == cpu_t::runtime)
cpu = get_cpu();
switch (cpu)
{
case cpu_t::avx512:
CMT_IF_ENABLED_AVX512(avx512::dft_initialize(*this); break;)
case cpu_t::avx2:
CMT_IF_ENABLED_AVX2(avx2::dft_initialize(*this); break;)
case cpu_t::avx:
CMT_IF_ENABLED_AVX(avx::dft_initialize(*this); break;)
case cpu_t::sse42:
case cpu_t::sse41:
CMT_IF_ENABLED_SSE41(sse41::dft_initialize(*this); break;)
case cpu_t::ssse3:
CMT_IF_ENABLED_SSSE3(ssse3::dft_initialize(*this); break;)
case cpu_t::sse3:
CMT_IF_ENABLED_SSE3(sse3::dft_initialize(*this); break;)
default:
CMT_IF_ENABLED_SSE2(sse2::dft_initialize(*this); break;);
}
}
explicit dft_plan(size_t size, dft_order order = dft_order::normal)
:dft_plan(cpu_t::runtime, size, order)
{
}
#else
explicit dft_plan(size_t size, dft_order order = dft_order::normal)
: size(size), temp_size(0), data_size(0)
{
dft_initialize(*this);
}
#endif
void dump() const
{
for (const std::unique_ptr<dft_stage<T>>& s : stages)
{
s->dump();
}
}
KFR_MEM_INTRINSIC void execute(complex<T>* out, const complex<T>* in, u8* temp,
bool inverse = false) const
{
if (inverse)
execute_dft(ctrue, out, in, temp);
else
execute_dft(cfalse, out, in, temp);
}
~dft_plan() {}
template <bool inverse>
KFR_MEM_INTRINSIC void execute(complex<T>* out, const complex<T>* in, u8* temp,
cbool_t<inverse> inv) const
{
execute_dft(inv, out, in, temp);
}
template <univector_tag Tag1, univector_tag Tag2, univector_tag Tag3>
KFR_MEM_INTRINSIC void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in,
univector<u8, Tag3>& temp, bool inverse = false) const
{
if (inverse)
execute_dft(ctrue, out.data(), in.data(), temp.data());
else
execute_dft(cfalse, out.data(), in.data(), temp.data());
}
template <bool inverse, univector_tag Tag1, univector_tag Tag2, univector_tag Tag3>
KFR_MEM_INTRINSIC void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in,
univector<u8, Tag3>& temp, cbool_t<inverse> inv) const
{
execute_dft(inv, out.data(), in.data(), temp.data());
}
autofree<u8> data;
size_t data_size;
std::vector<dft_stage_ptr<T>> stages;
protected:
struct noinit
{
};
explicit dft_plan(noinit, size_t size, dft_order order = dft_order::normal)
: size(size), temp_size(0), data_size(0)
{
}
const complex<T>* select_in(size_t stage, const complex<T>* out, const complex<T>* in,
const complex<T>* scratch, bool in_scratch) const
{
if (stage == 0)
return in_scratch ? scratch : in;
return stages[stage - 1]->to_scratch ? scratch : out;
}
complex<T>* select_out(size_t stage, complex<T>* out, complex<T>* scratch) const
{
return stages[stage]->to_scratch ? scratch : out;
}
template <bool inverse>
void execute_dft(cbool_t<inverse>, complex<T>* out, const complex<T>* in, u8* temp) const
{
if (stages.size() == 1 && (stages[0]->can_inplace || in != out))
{
return stages[0]->execute(cbool<inverse>, out, in, temp);
}
size_t stack[32] = { 0 };
complex<T>* scratch = ptr_cast<complex<T>>(
temp + this->temp_size -
align_up(sizeof(complex<T>) * this->size, platform<>::native_cache_alignment));
bool in_scratch = !stages[0]->can_inplace && in == out;
if (in_scratch)
{
builtin_memcpy(scratch, in, sizeof(complex<T>) * this->size);
}
const size_t count = stages.size();
for (size_t depth = 0; depth < count;)
{
if (stages[depth]->recursion)
{
size_t offset = 0;
size_t rdepth = depth;
size_t maxdepth = depth;
do
{
if (stack[rdepth] == stages[rdepth]->repeats)
{
stack[rdepth] = 0;
rdepth--;
}
else
{
complex<T>* rout = select_out(rdepth, out, scratch);
const complex<T>* rin = select_in(rdepth, out, in, scratch, in_scratch);
stages[rdepth]->execute(cbool<inverse>, rout + offset, rin + offset, temp);
offset += stages[rdepth]->out_offset;
stack[rdepth]++;
if (rdepth < count - 1 && stages[rdepth + 1]->recursion)
rdepth++;
else
maxdepth = rdepth;
}
} while (rdepth != depth);
depth = maxdepth + 1;
}
else
{
size_t offset = 0;
while (offset < this->size)
{
stages[depth]->execute(cbool<inverse>, select_out(depth, out, scratch) + offset,
select_in(depth, out, in, scratch, in_scratch) + offset, temp);
offset += stages[depth]->stage_size;
}
depth++;
}
}
}
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/fft.hpp#L135
fft_inverse
fft_inverse
class¶
template <typename E> fft_inverse
[0, N - 1, N - 2, N - 3, ..., 3, 2, 1]
Source code
template <typename E>
struct fft_inverse : internal::expression_with_arguments<E>
{
using value_type = value_type_of<E>;
KFR_MEM_INTRINSIC fft_inverse(E&& expr) CMT_NOEXCEPT
: internal::expression_with_arguments<E>(std::forward<E>(expr))
{
}
friend KFR_INTRINSIC vec<value_type, 1> get_elements(const fft_inverse& self, cinput_t input,
size_t index, vec_shape<value_type, 1>)
{
return self.argument_first(input, index == 0 ? 0 : self.size() - index, vec_shape<value_type, 1>());
}
template <size_t N>
friend KFR_MEM_INTRINSIC vec<value_type, N> get_elements(const fft_inverse& self, cinput_t input,
size_t index, vec_shape<value_type, N>)
{
if (index == 0)
{
return concat(
self.argument_first(input, index, vec_shape<value_type, 1>()),
reverse(self.argument_first(input, self.size() - (N - 1), vec_shape<value_type, N - 1>())));
}
return reverse(self.argument_first(input, self.size() - index - (N - 1), vec_shape<value_type, N>()));
}
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/impl/dft-impl.hpp#L161
idft
function¶
template <typename T, univector_tag Tag>
univector<complex<T>>
idft(const univector<complex<T>, Tag> &input)
Performs Inverse DFT using cached plan
Source code
template <typename T, univector_tag Tag>
univector<complex<T>> idft(const univector<complex<T>, Tag>& input)
{
dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), input.size());
univector<complex<T>> output(input.size(), std::numeric_limits<T>::quiet_NaN());
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp, ctrue);
return output;
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L141
irealdft
function¶
template <typename T, univector_tag Tag>
univector<T>
irealdft(const univector<complex<T>, Tag> &input)
Permorms Real Inverse DFT using cached plan
Source code
template <typename T, univector_tag Tag>
univector<T> irealdft(const univector<complex<T>, Tag>& input)
{
dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype_t<T>(), (input.size() - 1) * 2);
univector<T> output((input.size() - 1) * 2, std::numeric_limits<T>::quiet_NaN());
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp);
return output;
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L163
realdft
function¶
template <typename T, univector_tag Tag>
univector<complex<T>>
realdft(const univector<T, Tag> &input)
Performs Real Direct DFT using cached plan
Source code
template <typename T, univector_tag Tag>
univector<complex<T>> realdft(const univector<T, Tag>& input)
{
dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype_t<T>(), input.size());
univector<complex<T>> output(input.size() / 2 + 1, std::numeric_limits<T>::quiet_NaN());
univector<u8> temp(dft->temp_size);
dft->execute(output, input, temp);
return output;
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L152
reference_dft
function¶
template <typename Tnumber = double, typename T>
void reference_dft(complex<T> *out, const complex<T> *in,
size_t size, bool inversion = false)
Performs Complex DFT using reference implementation (slow, used for testing)
Source code
template <typename Tnumber = double, typename T>
void reference_dft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
{
using std::cos;
using std::sin;
if (is_poweroftwo(size))
{
return reference_fft<Tnumber>(out, in, size, inversion);
}
constexpr Tnumber pi2 = c_pi<Tnumber, 2>;
if (size < 2)
return;
std::vector<complex<T>> datain;
if (out == in)
{
datain.resize(size);
std::copy_n(in, size, datain.begin());
in = datain.data();
}
{
Tnumber sumr = 0;
Tnumber sumi = 0;
for (size_t j = 0; j < size; j++)
{
sumr += static_cast<Tnumber>(in[j].real());
sumi += static_cast<Tnumber>(in[j].imag());
}
out[0] = { static_cast<T>(sumr), static_cast<T>(sumi) };
}
for (size_t i = 1; i < size; i++)
{
Tnumber sumr = static_cast<Tnumber>(in[0].real());
Tnumber sumi = static_cast<Tnumber>(in[0].imag());
for (size_t j = 1; j < size; j++)
{
const Tnumber x = pi2 * ((i * j) % size) / size;
Tnumber twr = cos(x);
Tnumber twi = sin(x);
if (inversion)
twi = -twi;
sumr += twr * static_cast<Tnumber>(in[j].real()) + twi * static_cast<Tnumber>(in[j].imag());
sumi += twr * static_cast<Tnumber>(in[j].imag()) - twi * static_cast<Tnumber>(in[j].real());
out[i] = { static_cast<T>(sumr), static_cast<T>(sumi) };
}
}
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L138
template <typename Tnumber = double, typename T>
inline univector<complex<T>>
reference_dft(const univector<complex<T>> &in,
bool inversion = false)
Performs DFT using reference implementation (slow, used for testing)
Source code
template <typename Tnumber = double, typename T>
inline univector<complex<T>> reference_dft(const univector<complex<T>>& in, bool inversion = false)
{
univector<complex<T>> out(in.size());
reference_dft(&out[0], &in[0], in.size(), inversion);
return out;
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L188
reference_fft
function¶
template <typename Tnumber = double, typename T>
void reference_fft(complex<T> *out, const complex<T> *in,
size_t size, bool inversion = false)
Performs Complex FFT using reference implementation (slow, used for testing)
Source code
template <typename Tnumber = double, typename T>
void reference_fft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
{
using Tcmplx = Tnumber(*)[2];
if (size < 2)
return;
std::vector<complex<Tnumber>> datain(size);
std::vector<complex<Tnumber>> dataout(size);
std::vector<complex<Tnumber>> temp(size);
std::copy(in, in + size, datain.begin());
const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
Tcmplx(dataout.data()), Tcmplx(temp.data()));
std::copy(dataout.begin(), dataout.end(), out);
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L84
template <typename Tnumber = double, typename T>
void reference_fft(complex<T> *out, const T *in,
size_t size)
Performs Direct Real FFT using reference implementation (slow, used for testing)
Source code
template <typename Tnumber = double, typename T>
void reference_fft(complex<T>* out, const T* in, size_t size)
{
constexpr bool inversion = false;
using Tcmplx = Tnumber(*)[2];
if (size < 2)
return;
std::vector<complex<Tnumber>> datain(size);
std::vector<complex<Tnumber>> dataout(size);
std::vector<complex<Tnumber>> temp(size);
std::copy(in, in + size, datain.begin());
const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
Tcmplx(dataout.data()), Tcmplx(temp.data()));
std::copy(dataout.begin(), dataout.end(), out);
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L101
template <typename Tnumber = double, typename T>
void reference_fft(T *out, const complex<T> *in,
size_t size)
Performs Inverse Real FFT using reference implementation (slow, used for testing)
Source code
template <typename Tnumber = double, typename T>
void reference_fft(T* out, const complex<T>* in, size_t size)
{
constexpr bool inversion = true;
using Tcmplx = Tnumber(*)[2];
if (size < 2)
return;
std::vector<complex<Tnumber>> datain(size);
std::vector<complex<Tnumber>> dataout(size);
std::vector<complex<Tnumber>> temp(size);
std::copy(in, in + size, datain.begin());
const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
Tcmplx(dataout.data()), Tcmplx(temp.data()));
for (size_t i = 0; i < size; i++)
out[i] = dataout[i].real();
}
https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L119
Auto-generated from sources, Revision , https://github.com/kfrlib/kfr/blob//include/kfr/