Skip to content

DFT

cmul function

template <typename T, size_t N1, size_t N2>
vec<T, const_max(N1, N2)> cmul(const vec<T, N1> &x,
                               const vec<T, N2> &y)

Complex Multiplication

Source code
template <typename T, size_t N1, size_t N2>
KFR_INTRINSIC vec<T, const_max(N1, N2)> cmul(const vec<T, N1>& x, const vec<T, N2>& y)
{
    return intrinsics::cmul_impl(x, y);
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/impl/ft.hpp#L74

dct_plan

dct_plan class

template <typename T> dct_plan

DCT type 2 (unscaled)

Source code
template <typename T>
struct dct_plan : dft_plan<T>
{
    dct_plan(size_t size) : dft_plan<T>(size) { this->temp_size += sizeof(complex<T>) * size * 2; }

#ifdef KFR_DFT_MULTI
    dct_plan(cpu_t cpu, size_t size) : dft_plan<T>(cpu, size)
    {
        this->temp_size += sizeof(complex<T>) * size * 2;
    }
#endif

    KFR_MEM_INTRINSIC void execute(T* out, const T* in, u8* temp, bool inverse = false) const
    {
        const size_t size                  = this->size;
        const size_t halfSize              = size / 2;
        univector_ref<complex<T>> mirrored = make_univector(
            ptr_cast<complex<T>>(temp + this->temp_size - sizeof(complex<T>) * size * 2), size);
        univector_ref<complex<T>> mirrored_dft =
            make_univector(ptr_cast<complex<T>>(temp + this->temp_size - sizeof(complex<T>) * size), size);
        auto t = counter() * c_pi<T> / (size * 2);
        if (!inverse)
        {
            for (size_t i = 0; i < halfSize; i++)
            {
                mirrored[i]            = in[i * 2];
                mirrored[size - 1 - i] = in[i * 2 + 1];
            }
            if (size % 2)
            {
                mirrored[halfSize] = in[size - 1];
            }
            dft_plan<T>::execute(mirrored_dft.data(), mirrored.data(), temp, cfalse);
            make_univector(out, size) = real(mirrored_dft) * cos(t) + imag(mirrored_dft) * sin(t);
        }
        else
        {
            mirrored = make_complex(make_univector(in, size) * cos(t), make_univector(in, size) * -sin(t));
            dft_plan<T>::execute(mirrored_dft.data(), mirrored.data(), temp, cfalse);
            for (size_t i = 0; i < halfSize; i++)
            {
                out[i * 2 + 0] = mirrored_dft[i].real();
                out[i * 2 + 1] = mirrored_dft[size - 1 - i].real();
            }
            if (size % 2)
            {
                out[size - 1] = mirrored_dft[halfSize].real();
            }
        }
    }

    template <univector_tag Tag1, univector_tag Tag2, univector_tag Tag3>
    KFR_MEM_INTRINSIC void execute(univector<T, Tag1>& out, const univector<T, Tag2>& in,
                                   univector<u8, Tag3>& temp, bool inverse = false) const
    {
        execute(out.data(), in.data(), temp.data(), inverse);
    }
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/fft.hpp#L403

dft function

template <typename T, univector_tag Tag>
univector<complex<T>>
dft(const univector<complex<T>, Tag> &input)

Performs Direct DFT using cached plan

Source code
template <typename T, univector_tag Tag>
univector<complex<T>> dft(const univector<complex<T>, Tag>& input)
{
    dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), input.size());
    univector<complex<T>> output(input.size(), std::numeric_limits<T>::quiet_NaN());
    univector<u8> temp(dft->temp_size);
    dft->execute(output, input, temp);
    return output;
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L130

dft_plan

dft_plan class

template <typename T> dft_plan

Class for performing DFT/FFT

Source code
template <typename T>
struct dft_plan

https://github.com/kfrlib/kfr/blob//include/kfr/dft/fft.hpp#L119

dft_plan

template <typename T> dft_plan

Class for performing DFT/FFT

Source code
template <typename T>
struct dft_plan
{
    size_t size;
    size_t temp_size;

#ifdef KFR_DFT_MULTI
    explicit dft_plan(cpu_t cpu, size_t size, dft_order order = dft_order::normal)
        : size(size), temp_size(0), data_size(0)
    {
        if (cpu == cpu_t::runtime)
            cpu = get_cpu();
        switch (cpu)
        {
        case cpu_t::avx512:
            CMT_IF_ENABLED_AVX512(avx512::dft_initialize(*this); break;)
        case cpu_t::avx2:
            CMT_IF_ENABLED_AVX2(avx2::dft_initialize(*this); break;)
        case cpu_t::avx:
            CMT_IF_ENABLED_AVX(avx::dft_initialize(*this); break;)
        case cpu_t::sse42:
        case cpu_t::sse41:
            CMT_IF_ENABLED_SSE41(sse41::dft_initialize(*this); break;)
        case cpu_t::ssse3:
            CMT_IF_ENABLED_SSSE3(ssse3::dft_initialize(*this); break;)
        case cpu_t::sse3:
            CMT_IF_ENABLED_SSE3(sse3::dft_initialize(*this); break;)
        default:
            CMT_IF_ENABLED_SSE2(sse2::dft_initialize(*this); break;);
        }
    }
    explicit dft_plan(size_t size, dft_order order = dft_order::normal)
        :dft_plan(cpu_t::runtime, size, order)
    {
    }
#else
    explicit dft_plan(size_t size, dft_order order = dft_order::normal)
        : size(size), temp_size(0), data_size(0)
    {
        dft_initialize(*this);
    }
#endif


    void dump() const
    {
        for (const std::unique_ptr<dft_stage<T>>& s : stages)
        {
            s->dump();
        }
    }

    KFR_MEM_INTRINSIC void execute(complex<T>* out, const complex<T>* in, u8* temp,
                                   bool inverse = false) const
    {
        if (inverse)
            execute_dft(ctrue, out, in, temp);
        else
            execute_dft(cfalse, out, in, temp);
    }
    ~dft_plan() {}
    template <bool inverse>
    KFR_MEM_INTRINSIC void execute(complex<T>* out, const complex<T>* in, u8* temp,
                                   cbool_t<inverse> inv) const
    {
        execute_dft(inv, out, in, temp);
    }

    template <univector_tag Tag1, univector_tag Tag2, univector_tag Tag3>
    KFR_MEM_INTRINSIC void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in,
                                   univector<u8, Tag3>& temp, bool inverse = false) const
    {
        if (inverse)
            execute_dft(ctrue, out.data(), in.data(), temp.data());
        else
            execute_dft(cfalse, out.data(), in.data(), temp.data());
    }
    template <bool inverse, univector_tag Tag1, univector_tag Tag2, univector_tag Tag3>
    KFR_MEM_INTRINSIC void execute(univector<complex<T>, Tag1>& out, const univector<complex<T>, Tag2>& in,
                                   univector<u8, Tag3>& temp, cbool_t<inverse> inv) const
    {
        execute_dft(inv, out.data(), in.data(), temp.data());
    }

    autofree<u8> data;
    size_t data_size;
    std::vector<dft_stage_ptr<T>> stages;

protected:
    struct noinit
    {
    };
    explicit dft_plan(noinit, size_t size, dft_order order = dft_order::normal)
        : size(size), temp_size(0), data_size(0)
    {
    }
    const complex<T>* select_in(size_t stage, const complex<T>* out, const complex<T>* in,
                                const complex<T>* scratch, bool in_scratch) const
    {
        if (stage == 0)
            return in_scratch ? scratch : in;
        return stages[stage - 1]->to_scratch ? scratch : out;
    }
    complex<T>* select_out(size_t stage, complex<T>* out, complex<T>* scratch) const
    {
        return stages[stage]->to_scratch ? scratch : out;
    }

    template <bool inverse>
    void execute_dft(cbool_t<inverse>, complex<T>* out, const complex<T>* in, u8* temp) const
    {
        if (stages.size() == 1 && (stages[0]->can_inplace || in != out))
        {
            return stages[0]->execute(cbool<inverse>, out, in, temp);
        }
        size_t stack[32] = { 0 };

        complex<T>* scratch = ptr_cast<complex<T>>(
            temp + this->temp_size -
            align_up(sizeof(complex<T>) * this->size, platform<>::native_cache_alignment));

        bool in_scratch = !stages[0]->can_inplace && in == out;
        if (in_scratch)
        {
            builtin_memcpy(scratch, in, sizeof(complex<T>) * this->size);
        }

        const size_t count = stages.size();

        for (size_t depth = 0; depth < count;)
        {
            if (stages[depth]->recursion)
            {
                size_t offset   = 0;
                size_t rdepth   = depth;
                size_t maxdepth = depth;
                do
                {
                    if (stack[rdepth] == stages[rdepth]->repeats)
                    {
                        stack[rdepth] = 0;
                        rdepth--;
                    }
                    else
                    {
                        complex<T>* rout      = select_out(rdepth, out, scratch);
                        const complex<T>* rin = select_in(rdepth, out, in, scratch, in_scratch);
                        stages[rdepth]->execute(cbool<inverse>, rout + offset, rin + offset, temp);
                        offset += stages[rdepth]->out_offset;
                        stack[rdepth]++;
                        if (rdepth < count - 1 && stages[rdepth + 1]->recursion)
                            rdepth++;
                        else
                            maxdepth = rdepth;
                    }
                } while (rdepth != depth);
                depth = maxdepth + 1;
            }
            else
            {
                size_t offset   = 0;
                while (offset < this->size)
                {
                    stages[depth]->execute(cbool<inverse>, select_out(depth, out, scratch) + offset,
                                       select_in(depth, out, in, scratch, in_scratch) + offset, temp);
                    offset += stages[depth]->stage_size;
                }
                depth++;
            }
        }
    }
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/fft.hpp#L135

fft_inverse

fft_inverse class

template <typename E> fft_inverse

[0, N - 1, N - 2, N - 3, ..., 3, 2, 1]

Source code
template <typename E>
struct fft_inverse : internal::expression_with_arguments<E>
{
    using value_type = value_type_of<E>;

    KFR_MEM_INTRINSIC fft_inverse(E&& expr) CMT_NOEXCEPT
        : internal::expression_with_arguments<E>(std::forward<E>(expr))
    {
    }

    friend KFR_INTRINSIC vec<value_type, 1> get_elements(const fft_inverse& self, cinput_t input,
                                                         size_t index, vec_shape<value_type, 1>)
    {
        return self.argument_first(input, index == 0 ? 0 : self.size() - index, vec_shape<value_type, 1>());
    }

    template <size_t N>
    friend KFR_MEM_INTRINSIC vec<value_type, N> get_elements(const fft_inverse& self, cinput_t input,
                                                             size_t index, vec_shape<value_type, N>)
    {
        if (index == 0)
        {
            return concat(
                self.argument_first(input, index, vec_shape<value_type, 1>()),
                reverse(self.argument_first(input, self.size() - (N - 1), vec_shape<value_type, N - 1>())));
        }
        return reverse(self.argument_first(input, self.size() - index - (N - 1), vec_shape<value_type, N>()));
    }
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/impl/dft-impl.hpp#L161

idft function

template <typename T, univector_tag Tag>
univector<complex<T>>
idft(const univector<complex<T>, Tag> &input)

Performs Inverse DFT using cached plan

Source code
template <typename T, univector_tag Tag>
univector<complex<T>> idft(const univector<complex<T>, Tag>& input)
{
    dft_plan_ptr<T> dft = dft_cache::instance().get(ctype_t<T>(), input.size());
    univector<complex<T>> output(input.size(), std::numeric_limits<T>::quiet_NaN());
    univector<u8> temp(dft->temp_size);
    dft->execute(output, input, temp, ctrue);
    return output;
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L141

irealdft function

template <typename T, univector_tag Tag>
univector<T>
irealdft(const univector<complex<T>, Tag> &input)

Permorms Real Inverse DFT using cached plan

Source code
template <typename T, univector_tag Tag>
univector<T> irealdft(const univector<complex<T>, Tag>& input)
{
    dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype_t<T>(), (input.size() - 1) * 2);
    univector<T> output((input.size() - 1) * 2, std::numeric_limits<T>::quiet_NaN());
    univector<u8> temp(dft->temp_size);
    dft->execute(output, input, temp);
    return output;
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L163

realdft function

template <typename T, univector_tag Tag>
univector<complex<T>>
realdft(const univector<T, Tag> &input)

Performs Real Direct DFT using cached plan

Source code
template <typename T, univector_tag Tag>
univector<complex<T>> realdft(const univector<T, Tag>& input)
{
    dft_plan_real_ptr<T> dft = dft_cache::instance().getreal(ctype_t<T>(), input.size());
    univector<complex<T>> output(input.size() / 2 + 1, std::numeric_limits<T>::quiet_NaN());
    univector<u8> temp(dft->temp_size);
    dft->execute(output, input, temp);
    return output;
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/cache.hpp#L152

reference_dft function

template <typename Tnumber = double, typename T>
void reference_dft(complex<T> *out, const complex<T> *in,
                   size_t size, bool inversion = false)

Performs Complex DFT using reference implementation (slow, used for testing)

Source code
template <typename Tnumber = double, typename T>
void reference_dft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
{
    using std::cos;
    using std::sin;
    if (is_poweroftwo(size))
    {
        return reference_fft<Tnumber>(out, in, size, inversion);
    }
    constexpr Tnumber pi2 = c_pi<Tnumber, 2>;
    if (size < 2)
        return;
    std::vector<complex<T>> datain;
    if (out == in)
    {
        datain.resize(size);
        std::copy_n(in, size, datain.begin());
        in = datain.data();
    }
    {
        Tnumber sumr = 0;
        Tnumber sumi = 0;
        for (size_t j = 0; j < size; j++)
        {
            sumr += static_cast<Tnumber>(in[j].real());
            sumi += static_cast<Tnumber>(in[j].imag());
        }
        out[0] = { static_cast<T>(sumr), static_cast<T>(sumi) };
    }
    for (size_t i = 1; i < size; i++)
    {
        Tnumber sumr = static_cast<Tnumber>(in[0].real());
        Tnumber sumi = static_cast<Tnumber>(in[0].imag());

        for (size_t j = 1; j < size; j++)
        {
            const Tnumber x = pi2 * ((i * j) % size) / size;
            Tnumber twr     = cos(x);
            Tnumber twi     = sin(x);
            if (inversion)
                twi = -twi;

            sumr += twr * static_cast<Tnumber>(in[j].real()) + twi * static_cast<Tnumber>(in[j].imag());
            sumi += twr * static_cast<Tnumber>(in[j].imag()) - twi * static_cast<Tnumber>(in[j].real());
            out[i] = { static_cast<T>(sumr), static_cast<T>(sumi) };
        }
    }
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L138

template <typename Tnumber = double, typename T>
inline univector<complex<T>>
reference_dft(const univector<complex<T>> &in,
              bool inversion = false)

Performs DFT using reference implementation (slow, used for testing)

Source code
template <typename Tnumber = double, typename T>
inline univector<complex<T>> reference_dft(const univector<complex<T>>& in, bool inversion = false)
{
    univector<complex<T>> out(in.size());
    reference_dft(&out[0], &in[0], in.size(), inversion);
    return out;
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L188

reference_fft function

template <typename Tnumber = double, typename T>
void reference_fft(complex<T> *out, const complex<T> *in,
                   size_t size, bool inversion = false)

Performs Complex FFT using reference implementation (slow, used for testing)

Source code
template <typename Tnumber = double, typename T>
void reference_fft(complex<T>* out, const complex<T>* in, size_t size, bool inversion = false)
{
    using Tcmplx = Tnumber(*)[2];
    if (size < 2)
        return;
    std::vector<complex<Tnumber>> datain(size);
    std::vector<complex<Tnumber>> dataout(size);
    std::vector<complex<Tnumber>> temp(size);
    std::copy(in, in + size, datain.begin());
    const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
    reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
                                Tcmplx(dataout.data()), Tcmplx(temp.data()));
    std::copy(dataout.begin(), dataout.end(), out);
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L84

template <typename Tnumber = double, typename T>
void reference_fft(complex<T> *out, const T *in,
                   size_t size)

Performs Direct Real FFT using reference implementation (slow, used for testing)

Source code
template <typename Tnumber = double, typename T>
void reference_fft(complex<T>* out, const T* in, size_t size)
{
    constexpr bool inversion = false;
    using Tcmplx             = Tnumber(*)[2];
    if (size < 2)
        return;
    std::vector<complex<Tnumber>> datain(size);
    std::vector<complex<Tnumber>> dataout(size);
    std::vector<complex<Tnumber>> temp(size);
    std::copy(in, in + size, datain.begin());
    const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
    reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
                                Tcmplx(dataout.data()), Tcmplx(temp.data()));
    std::copy(dataout.begin(), dataout.end(), out);
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L101

template <typename Tnumber = double, typename T>
void reference_fft(T *out, const complex<T> *in,
                   size_t size)

Performs Inverse Real FFT using reference implementation (slow, used for testing)

Source code
template <typename Tnumber = double, typename T>
void reference_fft(T* out, const complex<T>* in, size_t size)
{
    constexpr bool inversion = true;
    using Tcmplx             = Tnumber(*)[2];
    if (size < 2)
        return;
    std::vector<complex<Tnumber>> datain(size);
    std::vector<complex<Tnumber>> dataout(size);
    std::vector<complex<Tnumber>> temp(size);
    std::copy(in, in + size, datain.begin());
    const Tnumber pi2 = c_pi<Tnumber, 2, 1>;
    reference_fft_pass<Tnumber>(pi2, size, 0, 1, inversion ? -1 : +1, Tcmplx(datain.data()),
                                Tcmplx(dataout.data()), Tcmplx(temp.data()));
    for (size_t i = 0; i < size; i++)
        out[i] = dataout[i].real();
}

https://github.com/kfrlib/kfr/blob//include/kfr/dft/reference_dft.hpp#L119


Auto-generated from sources, Revision , https://github.com/kfrlib/kfr/blob//include/kfr/