This documentation is automatically generated by online-judge-tools/verification-helper
#include "library/polynomial/compose_inv.hpp"形式的冪級数 $\displaystyle f(x) = \sum _ {i = 0} ^ \infty f _ i x ^ i$ の前 $n$ 項が与えられたときに $g(f(x)) = f(g(x)) = x$ を満たす形式的冪級数 $g(x)$ の前 $n$ 項を計算するライブラリ。
ただし $f _ 0 = 0$ および $f _ 1\neq 0$ を仮定する。
$B := \lceil \sqrt{n} \rceil$ と定めると、次が成り立つ。
\[g(f(x)) \equiv \sum _ {i = 0} ^ {n - 1} g _ i (f(x)) ^ i \equiv \sum _ {q = 0} ^ {B - 1}x ^ {qB} ((f(x) / x) ^ B) ^ q \sum _ {r = 0} ^ {B - 1} g _ {qB + r} x ^ r (f(x) / x) ^ {r} \pmod{x ^ n}.\]$h _ r(x) := (f(x) / x) ^ r \bmod x ^ n \ (r=0,1,\ldots,B-1)$ および $H _ q(x) := ((f(x) / x) ^ B) ^ q\bmod x ^ n\ (q=0,1,\ldots,B-1)$ を前計算しておく。この前計算は $O(Bn\log n) = O(n ^ {3/2} \log n)$ 時間で可能である。
$H _ q,h _ r$ を用いて $g(f(x))$ を書き直すと次のようになる。
\[g(f(x)) \equiv \sum _ {q = 0} ^ {B - 1}x ^ {qB} H _ q(x) \sum _ {r = 0} ^ {B - 1} g _ {qB + r} x ^ r h _ r(x) \pmod{x ^ n}.\]係数 $g _ 0, \ldots, g _ {qB - 1}$ の値が既知として、係数 $g _ {qB}, \ldots, g _ {(q + 1)B - 1}$ を得ることを考える。
$g(f(x)) = x$ より $g _ {qB}, \ldots, g _ {(q + 1)B - 1}$ は次を満たす。
\[x ^ {qB} H _ q(x) \sum _ {r = 0} ^ {B - 1} g _ {qB + r} x ^ r h _ r(x) \equiv x - \sum _ {q' = 0} ^ {q - 1}x ^ {q'B} H _ {q'}(x) \sum _ {r = 0} ^ {B - 1} g _ {q'B + r} x ^ r h _ r(x) \pmod{x ^ {(q + 1)B}}.\]$\displaystyle I _ q(x) := x - \sum _ {q’ = 0} ^ {q - 1}x ^ {q’B} H _ {q’}(x) \sum _ {r = 0} ^ {B - 1} g _ {q’B + r} x ^ r h _ r(x)$ とおいて整理することで次を得る。
\[\sum _ {r = 0} ^ {B - 1} g _ {qB + r} x ^ r h _ r(x) \equiv \dfrac{1}{H _ q(x)} \cdot \dfrac{I _ q(x)}{x ^ {qB}} \pmod{x ^ B}.\]右辺を改めて $J _ q(x)$ とおく。左辺の $x ^ r$ の係数は $g _ {qB}, \ldots, g _ {qB + r}$ のみから定まり、次が成り立つ。
\[g _ {qB + r} = (\lbrack x ^ 0 \rbrack h _ r(x)) ^ {-1} \left((\lbrack x ^ r \rbrack J _ q(x)) - \sum _ {r' = 0} ^ {r - 1} g _ {qB + r'} (\lbrack x ^ {r - r'}\rbrack h _ {r'}(x)) \right).\]($I _ q(x)$ が得られていると仮定すれば) $J _ q(x) \bmod x ^ B$ の計算は $O(B \log B)$ 時間で可能なので、$g _ {qB}, \ldots, g _ {(q + 1)B - 1}$ の計算は $O(B ^ 2) = O(n)$ 時間となる。
$I _ q(x)$ に関しては $\displaystyle I _ {q + 1}(x) = I _ q(x) - x ^ {qB} H _ q(x) \sum _ {r = 0} ^ {B - 1} g _ {qB + r} x ^ r h _ r(x)$ を用いて更新すれば、各々 $O(Bn + n \log n) = O(n ^ {3/2})$ 時間で計算できる。
結局、全体の計算量は $I _ q$ の更新がボトルネックとなり $O(Bn ^ {3/2}) = O(n ^ 2)$ 時間である。
#ifndef SUISEN_COMPOSE_INV
#define SUISEN_COMPOSE_INV
#include <atcoder/modint>
#include <atcoder/convolution>
#include "library/polynomial/formal_power_series.hpp"
namespace suisen {
// @return g mod x^n s.t. f(g(x))=g(f(x))=x (mod x^n)
template <typename mint>
std::vector<mint> compositional_inv(std::vector<mint> f, const int n) {
using fps = FormalPowerSeries<mint>;
assert(f[0] == 0);
assert(f[1] != 0);
if (n == 0) return {};
if (n == 1) return { 0 };
f.erase(f.begin());
f.resize(n);
const int B = ::sqrt(n) + 1;
const int z = [n]{
int z = 1;
while (z < 2 * n - 1) z <<= 1;
return z;
}();
const mint inv_z = mint(z).inv();
f.resize(z);
atcoder::internal::butterfly(f);
auto mul_f = [&](std::vector<mint> a) {
a.resize(z);
atcoder::internal::butterfly(a);
for (int j = 0; j < z; ++j) a[j] *= f[j] * inv_z;
atcoder::internal::butterfly_inv(a);
a.resize(n);
return a;
};
std::vector<fps> h(B + 1, fps(n));
h[0][0] = 1;
for (int i = 1; i <= B; ++i) {
h[i] = mul_f(h[i - 1]);
}
std::vector<mint>& hB = h[B];
hB.resize(z);
atcoder::internal::butterfly(hB);
auto mul_hB = [&](std::vector<mint> a) {
a.resize(z);
atcoder::internal::butterfly(a);
for (int j = 0; j < z; ++j) a[j] *= hB[j] * inv_z;
atcoder::internal::butterfly_inv(a);
a.resize(n);
return a;
};
fps H(n);
H[0] = 1;
fps g(B * B);
fps I(B * B);
I[1] = 1;
for (int q = 0; q < B; ++q) {
// loop invariant
// - H = H_q, I = I_q
// - g[0,qB) is calculated
// (I_q / x^{qB}) / H_q
fps J = fps(I.begin() + q * B, I.begin() + (q * B + B)) * H.cut_copy(B).inv(B);
J.resize(B);
for (int r = 0; r < B; ++r) {
for (int i = 0; i < r; ++i) {
J[r] -= J[i] * h[i][r - i];
}
J[r] /= h[r][0];
}
std::move(J.begin(), J.end(), g.begin() + (B * q));
if (q == B - 1) break;
// update I_q
const int size_q = n - q * B;
J.assign(size_q, 0);
for (int r = 0; r < B; ++r) {
const mint gr = g[q * B + r];
for (int k = 0; k < size_q - r; ++k) {
J[r + k] += gr * h[r][k];
}
}
J *= H.cut_copy(size_q);
for (int k = 0; k < size_q; ++k) {
I[q * B + k] -= J[k];
}
// update H_q
H = mul_hB(H);
}
g.resize(n);
return g;
}
} // namespace suisen
#endif // SUISEN_COMPOSE_INV#line 1 "library/polynomial/compose_inv.hpp"
#include <atcoder/modint>
#include <atcoder/convolution>
#line 1 "library/polynomial/formal_power_series.hpp"
#include <limits>
#include <optional>
#include <queue>
#line 10 "library/polynomial/formal_power_series.hpp"
#line 1 "library/polynomial/fps_naive.hpp"
#include <cassert>
#include <cmath>
#line 7 "library/polynomial/fps_naive.hpp"
#include <type_traits>
#include <vector>
#line 1 "library/type_traits/type_traits.hpp"
#line 5 "library/type_traits/type_traits.hpp"
#include <iostream>
#line 7 "library/type_traits/type_traits.hpp"
namespace suisen {
template <typename ...Constraints> using constraints_t = std::enable_if_t<std::conjunction_v<Constraints...>, std::nullptr_t>;
template <typename T, typename = std::nullptr_t> struct bitnum { static constexpr int value = 0; };
template <typename T> struct bitnum<T, constraints_t<std::is_integral<T>>> { static constexpr int value = std::numeric_limits<std::make_unsigned_t<T>>::digits; };
template <typename T> static constexpr int bitnum_v = bitnum<T>::value;
template <typename T, size_t n> struct is_nbit { static constexpr bool value = bitnum_v<T> == n; };
template <typename T, size_t n> static constexpr bool is_nbit_v = is_nbit<T, n>::value;
template <typename T, typename = std::nullptr_t> struct safely_multipliable { using type = T; };
template <typename T> struct safely_multipliable<T, constraints_t<std::is_signed<T>, is_nbit<T, 32>>> { using type = long long; };
template <typename T> struct safely_multipliable<T, constraints_t<std::is_signed<T>, is_nbit<T, 64>>> { using type = __int128_t; };
template <typename T> struct safely_multipliable<T, constraints_t<std::is_unsigned<T>, is_nbit<T, 32>>> { using type = unsigned long long; };
template <typename T> struct safely_multipliable<T, constraints_t<std::is_unsigned<T>, is_nbit<T, 64>>> { using type = __uint128_t; };
template <typename T> using safely_multipliable_t = typename safely_multipliable<T>::type;
template <typename T, typename = void> struct rec_value_type { using type = T; };
template <typename T> struct rec_value_type<T, std::void_t<typename T::value_type>> {
using type = typename rec_value_type<typename T::value_type>::type;
};
template <typename T> using rec_value_type_t = typename rec_value_type<T>::type;
template <typename T> class is_iterable {
template <typename T_> static auto test(T_ e) -> decltype(e.begin(), e.end(), std::true_type{});
static std::false_type test(...);
public:
static constexpr bool value = decltype(test(std::declval<T>()))::value;
};
template <typename T> static constexpr bool is_iterable_v = is_iterable<T>::value;
template <typename T> class is_writable {
template <typename T_> static auto test(T_ e) -> decltype(std::declval<std::ostream&>() << e, std::true_type{});
static std::false_type test(...);
public:
static constexpr bool value = decltype(test(std::declval<T>()))::value;
};
template <typename T> static constexpr bool is_writable_v = is_writable<T>::value;
template <typename T> class is_readable {
template <typename T_> static auto test(T_ e) -> decltype(std::declval<std::istream&>() >> e, std::true_type{});
static std::false_type test(...);
public:
static constexpr bool value = decltype(test(std::declval<T>()))::value;
};
template <typename T> static constexpr bool is_readable_v = is_readable<T>::value;
} // namespace suisen
#line 11 "library/polynomial/fps_naive.hpp"
#line 1 "library/math/modint_extension.hpp"
#line 6 "library/math/modint_extension.hpp"
/**
* reference: https://37zigen.com/tonelli-shanks-algorithm/
* calculates x s.t. x^2 = a mod p in O((log p)^2).
*/
template <typename mint>
std::optional<mint> safe_sqrt(mint a) {
static int p = mint::mod();
if (a == 0) return std::make_optional(0);
if (p == 2) return std::make_optional(a);
if (a.pow((p - 1) / 2) != 1) return std::nullopt;
mint b = 1;
while (b.pow((p - 1) / 2) == 1) ++b;
static int tlz = __builtin_ctz(p - 1), q = (p - 1) >> tlz;
mint x = a.pow((q + 1) / 2);
b = b.pow(q);
for (int shift = 2; x * x != a; ++shift) {
mint e = a.inv() * x * x;
if (e.pow(1 << (tlz - shift)) != 1) x *= b;
b *= b;
}
return std::make_optional(x);
}
/**
* calculates x s.t. x^2 = a mod p in O((log p)^2).
* if not exists, raises runtime error.
*/
template <typename mint>
auto sqrt(mint a) -> decltype(mint::mod(), mint()) {
return *safe_sqrt(a);
}
template <typename mint>
auto log(mint a) -> decltype(mint::mod(), mint()) {
assert(a == 1);
return 0;
}
template <typename mint>
auto exp(mint a) -> decltype(mint::mod(), mint()) {
assert(a == 0);
return 1;
}
template <typename mint, typename T>
auto pow(mint a, T b) -> decltype(mint::mod(), mint()) {
return a.pow(b);
}
template <typename mint>
auto inv(mint a) -> decltype(mint::mod(), mint()) {
return a.inv();
}
#line 1 "library/math/inv_mods.hpp"
#line 5 "library/math/inv_mods.hpp"
namespace suisen {
template <typename mint>
class inv_mods {
public:
inv_mods() = default;
inv_mods(int n) { ensure(n); }
const mint& operator[](int i) const {
ensure(i);
return invs[i];
}
static void ensure(int n) {
int sz = invs.size();
if (sz < 2) invs = { 0, 1 }, sz = 2;
if (sz < n + 1) {
invs.resize(n + 1);
for (int i = sz; i <= n; ++i) invs[i] = mint(mod - mod / i) * invs[mod % i];
}
}
private:
static std::vector<mint> invs;
static constexpr int mod = mint::mod();
};
template <typename mint>
std::vector<mint> inv_mods<mint>::invs{};
template <typename mint>
std::vector<mint> get_invs(const std::vector<mint>& vs) {
const int n = vs.size();
mint p = 1;
for (auto& e : vs) {
p *= e;
assert(e != 0);
}
mint ip = p.inv();
std::vector<mint> rp(n + 1);
rp[n] = 1;
for (int i = n - 1; i >= 0; --i) {
rp[i] = rp[i + 1] * vs[i];
}
std::vector<mint> res(n);
for (int i = 0; i < n; ++i) {
res[i] = ip * rp[i + 1];
ip *= vs[i];
}
return res;
}
}
#line 14 "library/polynomial/fps_naive.hpp"
namespace suisen {
template <typename T>
struct FPSNaive : std::vector<T> {
static inline int MAX_SIZE = std::numeric_limits<int>::max() / 2;
using value_type = T;
using element_type = rec_value_type_t<T>;
using std::vector<value_type>::vector;
FPSNaive(const std::initializer_list<value_type> l) : std::vector<value_type>::vector(l) {}
FPSNaive(const std::vector<value_type>& v) : std::vector<value_type>::vector(v) {}
static void set_max_size(int n) {
FPSNaive<T>::MAX_SIZE = n;
}
const value_type operator[](int n) const {
return n <= deg() ? unsafe_get(n) : value_type{ 0 };
}
value_type& operator[](int n) {
return ensure_deg(n), unsafe_get(n);
}
int size() const {
return std::vector<value_type>::size();
}
int deg() const {
return size() - 1;
}
int normalize() {
while (size() and this->back() == value_type{ 0 }) this->pop_back();
return deg();
}
FPSNaive& cut_inplace(int n) {
if (size() > n) this->resize(std::max(0, n));
return *this;
}
FPSNaive cut(int n) const {
FPSNaive f = FPSNaive(*this).cut_inplace(n);
return f;
}
FPSNaive operator+() const {
return FPSNaive(*this);
}
FPSNaive operator-() const {
FPSNaive f(*this);
for (auto& e : f) e = -e;
return f;
}
FPSNaive& operator++() { return ++(*this)[0], * this; }
FPSNaive& operator--() { return --(*this)[0], * this; }
FPSNaive& operator+=(const value_type x) { return (*this)[0] += x, *this; }
FPSNaive& operator-=(const value_type x) { return (*this)[0] -= x, *this; }
FPSNaive& operator+=(const FPSNaive& g) {
ensure_deg(g.deg());
for (int i = 0; i <= g.deg(); ++i) unsafe_get(i) += g.unsafe_get(i);
return *this;
}
FPSNaive& operator-=(const FPSNaive& g) {
ensure_deg(g.deg());
for (int i = 0; i <= g.deg(); ++i) unsafe_get(i) -= g.unsafe_get(i);
return *this;
}
FPSNaive& operator*=(const FPSNaive& g) { return *this = *this * g; }
FPSNaive& operator*=(const value_type x) {
for (auto& e : *this) e *= x;
return *this;
}
FPSNaive& operator/=(const FPSNaive& g) { return *this = *this / g; }
FPSNaive& operator%=(const FPSNaive& g) { return *this = *this % g; }
FPSNaive& operator<<=(const int shamt) {
this->insert(this->begin(), shamt, value_type{ 0 });
return *this;
}
FPSNaive& operator>>=(const int shamt) {
if (shamt > size()) this->clear();
else this->erase(this->begin(), this->begin() + shamt);
return *this;
}
friend FPSNaive operator+(FPSNaive f, const FPSNaive& g) { f += g; return f; }
friend FPSNaive operator+(FPSNaive f, const value_type& x) { f += x; return f; }
friend FPSNaive operator-(FPSNaive f, const FPSNaive& g) { f -= g; return f; }
friend FPSNaive operator-(FPSNaive f, const value_type& x) { f -= x; return f; }
friend FPSNaive operator*(const FPSNaive& f, const FPSNaive& g) {
if (f.empty() or g.empty()) return FPSNaive{};
const int n = f.size(), m = g.size();
FPSNaive h(std::min(MAX_SIZE, n + m - 1));
for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) {
if (i + j >= MAX_SIZE) break;
h.unsafe_get(i + j) += f.unsafe_get(i) * g.unsafe_get(j);
}
return h;
}
friend FPSNaive operator*(FPSNaive f, const value_type& x) { f *= x; return f; }
friend FPSNaive operator/(FPSNaive f, const FPSNaive& g) { return std::move(f.div_mod(g).first); }
friend FPSNaive operator%(FPSNaive f, const FPSNaive& g) { return std::move(f.div_mod(g).second); }
friend FPSNaive operator*(const value_type x, FPSNaive f) { f *= x; return f; }
friend FPSNaive operator<<(FPSNaive f, const int shamt) { f <<= shamt; return f; }
friend FPSNaive operator>>(FPSNaive f, const int shamt) { f >>= shamt; return f; }
std::pair<FPSNaive, FPSNaive> div_mod(FPSNaive g) const {
FPSNaive f = *this;
const int fd = f.normalize(), gd = g.normalize();
assert(gd >= 0);
if (fd < gd) return { FPSNaive{}, f };
if (gd == 0) return { f *= g.unsafe_get(0).inv(), FPSNaive{} };
const int k = f.deg() - gd;
value_type head_inv = g.unsafe_get(gd).inv();
FPSNaive q(k + 1);
for (int i = k; i >= 0; --i) {
value_type div = f.unsafe_get(i + gd) * head_inv;
q.unsafe_get(i) = div;
for (int j = 0; j <= gd; ++j) f.unsafe_get(i + j) -= div * g.unsafe_get(j);
}
f.cut_inplace(gd);
f.normalize();
return { q, f };
}
friend bool operator==(const FPSNaive& f, const FPSNaive& g) {
const int n = f.size(), m = g.size();
if (n < m) return g == f;
for (int i = 0; i < m; ++i) if (f.unsafe_get(i) != g.unsafe_get(i)) return false;
for (int i = m; i < n; ++i) if (f.unsafe_get(i) != 0) return false;
return true;
}
friend bool operator!=(const FPSNaive& f, const FPSNaive& g) {
return not (f == g);
}
FPSNaive mul(const FPSNaive& g, int n = -1) const {
if (n < 0) n = size();
if (this->empty() or g.empty()) return FPSNaive{};
const int m = size(), k = g.size();
FPSNaive h(std::min(n, m + k - 1));
for (int i = 0; i < m; ++i) {
for (int j = 0, jr = std::min(k, n - i); j < jr; ++j) {
h.unsafe_get(i + j) += unsafe_get(i) * g.unsafe_get(j);
}
}
return h;
}
FPSNaive diff() const {
if (this->empty()) return {};
FPSNaive g(size() - 1);
for (int i = 1; i <= deg(); ++i) g.unsafe_get(i - 1) = unsafe_get(i) * i;
return g;
}
FPSNaive intg() const {
const int n = size();
FPSNaive g(n + 1);
for (int i = 0; i < n; ++i) g.unsafe_get(i + 1) = unsafe_get(i) * invs[i + 1];
if (g.deg() > MAX_SIZE) g.cut_inplace(MAX_SIZE);
return g;
}
FPSNaive inv(int n = -1) const {
if (n < 0) n = size();
FPSNaive g(n);
const value_type inv_f0 = ::inv(unsafe_get(0));
g.unsafe_get(0) = inv_f0;
for (int i = 1; i < n; ++i) {
for (int j = 1; j <= i; ++j) g.unsafe_get(i) -= g.unsafe_get(i - j) * (*this)[j];
g.unsafe_get(i) *= inv_f0;
}
return g;
}
FPSNaive exp(int n = -1) const {
if (n < 0) n = size();
assert(unsafe_get(0) == value_type{ 0 });
FPSNaive g(n);
g.unsafe_get(0) = value_type{ 1 };
for (int i = 1; i < n; ++i) {
for (int j = 1; j <= i; ++j) g.unsafe_get(i) += j * g.unsafe_get(i - j) * (*this)[j];
g.unsafe_get(i) *= invs[i];
}
return g;
}
FPSNaive log(int n = -1) const {
if (n < 0) n = size();
assert(unsafe_get(0) == value_type{ 1 });
FPSNaive g(n);
g.unsafe_get(0) = value_type{ 0 };
for (int i = 1; i < n; ++i) {
g.unsafe_get(i) = i * (*this)[i];
for (int j = 1; j < i; ++j) g.unsafe_get(i) -= (i - j) * g.unsafe_get(i - j) * (*this)[j];
g.unsafe_get(i) *= invs[i];
}
return g;
}
FPSNaive pow(const long long k, int n = -1) const {
if (n < 0) n = size();
if (k == 0) {
FPSNaive res(n);
res[0] = 1;
return res;
}
int z = 0;
while (z < size() and unsafe_get(z) == value_type{ 0 }) ++z;
if (z == size() or z > (n - 1) / k) return FPSNaive(n, 0);
const int m = n - z * k;
FPSNaive g(m);
const value_type inv_f0 = ::inv(unsafe_get(z));
g.unsafe_get(0) = unsafe_get(z).pow(k);
for (int i = 1; i < m; ++i) {
for (int j = 1; j <= i; ++j) g.unsafe_get(i) += (element_type{ k } *j - (i - j)) * g.unsafe_get(i - j) * (*this)[z + j];
g.unsafe_get(i) *= inv_f0 * invs[i];
}
g <<= z * k;
return g;
}
std::optional<FPSNaive> safe_sqrt(int n = -1) const {
if (n < 0) n = size();
int dl = 0;
while (dl < size() and unsafe_get(dl) == value_type{ 0 }) ++dl;
if (dl == size()) return FPSNaive(n, 0);
if (dl & 1) return std::nullopt;
const int m = n - dl / 2;
FPSNaive g(m);
auto opt_g0 = ::safe_sqrt((*this)[dl]);
if (not opt_g0.has_value()) return std::nullopt;
g.unsafe_get(0) = *opt_g0;
value_type inv_2g0 = ::inv(2 * g.unsafe_get(0));
for (int i = 1; i < m; ++i) {
g.unsafe_get(i) = (*this)[dl + i];
for (int j = 1; j < i; ++j) g.unsafe_get(i) -= g.unsafe_get(j) * g.unsafe_get(i - j);
g.unsafe_get(i) *= inv_2g0;
}
g <<= dl / 2;
return g;
}
FPSNaive sqrt(int n = -1) const {
if (n < 0) n = size();
return *safe_sqrt(n);
}
value_type eval(value_type x) const {
value_type y = 0;
for (int i = size() - 1; i >= 0; --i) y = y * x + unsafe_get(i);
return y;
}
private:
static inline inv_mods<element_type> invs;
void ensure_deg(int d) {
if (deg() < d) this->resize(d + 1, value_type{ 0 });
}
const value_type& unsafe_get(int i) const {
return std::vector<value_type>::operator[](i);
}
value_type& unsafe_get(int i) {
return std::vector<value_type>::operator[](i);
}
};
} // namespace suisen
template <typename mint>
suisen::FPSNaive<mint> sqrt(suisen::FPSNaive<mint> a) {
return a.sqrt();
}
template <typename mint>
suisen::FPSNaive<mint> log(suisen::FPSNaive<mint> a) {
return a.log();
}
template <typename mint>
suisen::FPSNaive<mint> exp(suisen::FPSNaive<mint> a) {
return a.exp();
}
template <typename mint, typename T>
suisen::FPSNaive<mint> pow(suisen::FPSNaive<mint> a, T b) {
return a.pow(b);
}
template <typename mint>
suisen::FPSNaive<mint> inv(suisen::FPSNaive<mint> a) {
return a.inv();
}
#line 14 "library/polynomial/formal_power_series.hpp"
namespace suisen {
template <typename mint, atcoder::internal::is_static_modint_t<mint>* = nullptr>
struct FormalPowerSeries : std::vector<mint> {
using base_type = std::vector<mint>;
using value_type = typename base_type::value_type;
using base_type::vector;
FormalPowerSeries(const std::initializer_list<value_type> l) : std::vector<value_type>::vector(l) {}
FormalPowerSeries(const std::vector<value_type>& v) : std::vector<value_type>::vector(v) {}
int size() const noexcept {
return base_type::size();
}
int deg() const noexcept {
return size() - 1;
}
void ensure(int n) {
if (size() < n) this->resize(n);
}
value_type safe_get(int d) const {
return d <= deg() ? (*this)[d] : 0;
}
value_type& safe_get(int d) {
ensure(d + 1);
return (*this)[d];
}
FormalPowerSeries& cut_trailing_zeros() {
while (size() and this->back() == 0) this->pop_back();
return *this;
}
FormalPowerSeries& cut(int n) {
if (size() > n) this->resize(std::max(0, n));
return *this;
}
FormalPowerSeries cut_copy(int n) const {
FormalPowerSeries res(this->begin(), this->begin() + std::min(size(), n));
res.ensure(n);
return res;
}
FormalPowerSeries cut_copy(int l, int r) const {
if (l >= size()) return FormalPowerSeries(r - l, 0);
FormalPowerSeries res(this->begin() + l, this->begin() + std::min(size(), r));
res.ensure(r - l);
return res;
}
/* Unary Operations */
FormalPowerSeries operator+() const { return *this; }
FormalPowerSeries operator-() const {
FormalPowerSeries res = *this;
for (auto& e : res) e = -e;
return res;
}
FormalPowerSeries& operator++() { return ++safe_get(0), * this; }
FormalPowerSeries& operator--() { return --safe_get(0), * this; }
FormalPowerSeries operator++(int) {
FormalPowerSeries res = *this;
++(*this);
return res;
}
FormalPowerSeries operator--(int) {
FormalPowerSeries res = *this;
--(*this);
return res;
}
/* Binary Operations With Constant */
FormalPowerSeries& operator+=(const value_type& x) { return safe_get(0) += x, *this; }
FormalPowerSeries& operator-=(const value_type& x) { return safe_get(0) -= x, *this; }
FormalPowerSeries& operator*=(const value_type& x) {
for (auto& e : *this) e *= x;
return *this;
}
FormalPowerSeries& operator/=(const value_type& x) { return *this *= x.inv(); }
friend FormalPowerSeries operator+(FormalPowerSeries f, const value_type& x) { f += x; return f; }
friend FormalPowerSeries operator+(const value_type& x, FormalPowerSeries f) { f += x; return f; }
friend FormalPowerSeries operator-(FormalPowerSeries f, const value_type& x) { f -= x; return f; }
friend FormalPowerSeries operator-(const value_type& x, FormalPowerSeries f) { f -= x; return -f; }
friend FormalPowerSeries operator*(FormalPowerSeries f, const value_type& x) { f *= x; return f; }
friend FormalPowerSeries operator*(const value_type& x, FormalPowerSeries f) { f *= x; return f; }
friend FormalPowerSeries operator/(FormalPowerSeries f, const value_type& x) { f /= x; return f; }
/* Binary Operations With Formal Power Series */
FormalPowerSeries& operator+=(const FormalPowerSeries& g) {
const int n = g.size();
ensure(n);
for (int i = 0; i < n; ++i) (*this)[i] += g[i];
return *this;
}
FormalPowerSeries& operator-=(const FormalPowerSeries& g) {
const int n = g.size();
ensure(n);
for (int i = 0; i < n; ++i) (*this)[i] -= g[i];
return *this;
}
FormalPowerSeries& operator*=(const FormalPowerSeries& g) { return *this = *this * g; }
FormalPowerSeries& operator/=(const FormalPowerSeries& g) { return *this = *this / g; }
FormalPowerSeries& operator%=(const FormalPowerSeries& g) { return *this = *this % g; }
friend FormalPowerSeries operator+(FormalPowerSeries f, const FormalPowerSeries& g) { f += g; return f; }
friend FormalPowerSeries operator-(FormalPowerSeries f, const FormalPowerSeries& g) { f -= g; return f; }
friend FormalPowerSeries operator*(const FormalPowerSeries& f, const FormalPowerSeries& g) {
const int size_f = f.size(), size_g = g.size();
if (size_f < size_g) return g * f;
if (std::min(size_f, size_g) <= 60) return atcoder::convolution(f, g);
const int deg = size_f + size_g - 2;
int fpow2 = 1;
while ((fpow2 << 1) <= deg) fpow2 <<= 1;
if (const int dif = deg - fpow2 + 1; dif <= 10) {
FormalPowerSeries h = atcoder::convolution(std::vector<mint>(f.begin(), f.end() - dif), g);
h.resize(h.size() + dif);
for (int i = size_f - dif; i < size_f; ++i) for (int j = 0; j < size_g; ++j) {
h[i + j] += f[i] * g[j];
}
return h;
}
return atcoder::convolution(f, g);
}
friend FormalPowerSeries operator/(FormalPowerSeries f, FormalPowerSeries g) {
if (f.size() < 60) return FPSNaive<mint>(f).div_mod(g).first;
f.cut_trailing_zeros(), g.cut_trailing_zeros();
const int fd = f.deg(), gd = g.deg();
assert(gd >= 0);
if (fd < gd) return {};
if (gd == 0) {
f /= g[0];
return f;
}
std::reverse(f.begin(), f.end()), std::reverse(g.begin(), g.end());
const int qd = fd - gd;
f.cut(qd + 1);
FormalPowerSeries q = f * g.inv(qd + 1);
q.cut(qd + 1);
std::reverse(q.begin(), q.end());
return q;
}
friend FormalPowerSeries operator%(const FormalPowerSeries& f, const FormalPowerSeries& g) { return f.div_mod(g).second; }
std::pair<FormalPowerSeries, FormalPowerSeries> div_mod(const FormalPowerSeries& g) const {
if (size() < 60) {
auto [q, r] = FPSNaive<mint>(*this).div_mod(g);
return { q, r };
}
FormalPowerSeries q = *this / g, r = *this - g * q;
r.cut_trailing_zeros();
return { q, r };
}
/* Shift Operations */
FormalPowerSeries& operator<<=(const int shamt) {
return this->insert(this->begin(), shamt, 0), * this;
}
FormalPowerSeries& operator>>=(const int shamt) {
return this->erase(this->begin(), this->begin() + std::min(shamt, size())), * this;
}
friend FormalPowerSeries operator<<(FormalPowerSeries f, const int shamt) { f <<= shamt; return f; }
friend FormalPowerSeries operator>>(FormalPowerSeries f, const int shamt) { f >>= shamt; return f; }
/* Compare */
friend bool operator==(const FormalPowerSeries& f, const FormalPowerSeries& g) {
const int n = f.size(), m = g.size();
if (n < m) return g == f;
for (int i = 0; i < m; ++i) if (f[i] != g[i]) return false;
for (int i = m; i < n; ++i) if (f[i] != 0) return false;
return true;
}
friend bool operator!=(const FormalPowerSeries& f, const FormalPowerSeries& g) { return not (f == g); }
/* Other Operations */
FormalPowerSeries& diff_inplace() {
if (this->empty()) return *this;
const int n = size();
for (int i = 1; i < n; ++i) (*this)[i - 1] = (*this)[i] * i;
return (*this)[n - 1] = 0, *this;
}
FormalPowerSeries diff() const {
FormalPowerSeries res = *this;
res.diff_inplace();
return res;
}
FormalPowerSeries& intg_inplace() {
const int n = size();
inv_mods<value_type> invs(n);
this->resize(n + 1);
for (int i = n; i > 0; --i) (*this)[i] = (*this)[i - 1] * invs[i];
return (*this)[0] = 0, *this;
}
FormalPowerSeries intg() const {
FormalPowerSeries res = *this;
res.intg_inplace();
return res;
}
FormalPowerSeries& inv_inplace(int n = -1) { return *this = inv(n); }
// reference: https://opt-cp.com/fps-fast-algorithms/
FormalPowerSeries inv(int n = -1) const {
if (n < 0) n = size();
if (n < 60) return FPSNaive<mint>(cut_copy(n)).inv();
if (auto sp_f = sparse_fps_format(15); sp_f.has_value()) return inv_sparse(std::move(*sp_f), n);
FormalPowerSeries f_fft, g_fft;
FormalPowerSeries g{ (*this)[0].inv() };
for (int k = 1; k < n; k *= 2) {
f_fft = cut_copy(2 * k), g_fft = g.cut_copy(2 * k);
atcoder::internal::butterfly(f_fft);
atcoder::internal::butterfly(g_fft);
update_inv(k, f_fft, g_fft, g);
}
g.resize(n);
return g;
}
FormalPowerSeries& log_inplace(int n = -1) { return *this = log(n); }
FormalPowerSeries log(int n = -1) const {
assert(safe_get(0) == 1);
if (n < 0) n = size();
if (n < 60) return FPSNaive<mint>(cut_copy(n)).log();
if (auto sp_f = sparse_fps_format(15); sp_f.has_value()) return log_sparse(std::move(*sp_f), n);
FormalPowerSeries res = inv(n) * diff();
res.resize(n - 1);
return res.intg();
}
FormalPowerSeries& exp_inplace(int n = -1) { return *this = exp(n); }
// https://arxiv.org/pdf/1301.5804.pdf
FormalPowerSeries exp(int n = -1) const {
assert(safe_get(0) == 0);
if (n < 0) n = size();
if (n < 60) return FPSNaive<mint>(cut_copy(n)).exp();
if (auto sp_f = sparse_fps_format(15); sp_f.has_value()) return exp_sparse(std::move(*sp_f), n);
// h = *this
// f = exp(h) mod x ^ k
// g = f^{-1} mod x ^ k
FormalPowerSeries dh = diff();
FormalPowerSeries f{ 1 }, f_fft;
FormalPowerSeries g{ 1 }, g_fft;
for (int k = 1; k < n; k *= 2) {
f_fft = f.cut_copy(2 * k), atcoder::internal::butterfly(f_fft);
if (k > 1) update_inv(k / 2, f_fft, g_fft, g);
FormalPowerSeries t = f.cut_copy(k);
t.diff_inplace();
{
FormalPowerSeries r = dh.cut_copy(k);
r.back() = 0;
atcoder::internal::butterfly(r);
for (int i = 0; i < k; ++i) r[i] *= f_fft[i];
atcoder::internal::butterfly_inv(r);
r /= -k;
t += r;
t <<= 1, t[0] = t[k], t.pop_back();
}
t.resize(2 * k);
atcoder::internal::butterfly(t);
g_fft = g.cut_copy(2 * k);
atcoder::internal::butterfly(g_fft);
for (int i = 0; i < 2 * k; ++i) t[i] *= g_fft[i];
atcoder::internal::butterfly_inv(t);
t.resize(k);
t /= 2 * k;
FormalPowerSeries v = cut_copy(2 * k) >>= k;
t <<= k - 1;
t.intg_inplace();
for (int i = 0; i < k; ++i) v[i] -= t[k + i];
v.resize(2 * k);
atcoder::internal::butterfly(v);
for (int i = 0; i < 2 * k; ++i) v[i] *= f_fft[i];
atcoder::internal::butterfly_inv(v);
v.resize(k);
v /= 2 * k;
f.resize(2 * k);
for (int i = 0; i < k; ++i) f[k + i] = v[i];
}
f.cut(n);
return f;
}
FormalPowerSeries& pow_inplace(long long k, int n = -1) { return *this = pow(k, n); }
FormalPowerSeries pow(const long long k, int n = -1) const {
if (n < 0) n = size();
if (n < 60) return FPSNaive<mint>(cut_copy(n)).pow(k);
if (auto sp_f = sparse_fps_format(15); sp_f.has_value()) return pow_sparse(std::move(*sp_f), k, n);
if (k == 0) {
FormalPowerSeries f{ 1 };
f.resize(n);
return f;
}
int tlz = 0;
while (tlz < size() and (*this)[tlz] == 0) ++tlz;
if (tlz == size() or tlz > (n - 1) / k) return FormalPowerSeries(n, 0);
const int m = n - tlz * k;
FormalPowerSeries f = *this >> tlz;
value_type base = f[0];
return ((((f /= base).log(m) *= k).exp(m) *= base.pow(k)) <<= (tlz * k));
}
std::optional<FormalPowerSeries> safe_sqrt(int n = -1) const {
if (n < 0) n = size();
if (n < 60) return FPSNaive<mint>(cut_copy(n)).safe_sqrt();
if (auto sp_f = sparse_fps_format(15); sp_f.has_value()) return safe_sqrt_sparse(std::move(*sp_f), n);
int tlz = 0;
while (tlz < size() and (*this)[tlz] == 0) ++tlz;
if (tlz == size()) return FormalPowerSeries(n, 0);
if (tlz & 1) return std::nullopt;
const int m = n - tlz / 2;
FormalPowerSeries h(this->begin() + tlz, this->end());
auto q0 = ::safe_sqrt(h[0]);
if (not q0.has_value()) return std::nullopt;
FormalPowerSeries f{ *q0 }, f_fft, g{ q0->inv() }, g_fft;
for (int k = 1; k < m; k *= 2) {
f_fft = f.cut_copy(2 * k), atcoder::internal::butterfly(f_fft);
if (k > 1) update_inv(k / 2, f_fft, g_fft, g);
g_fft = g.cut_copy(2 * k);
atcoder::internal::butterfly(g_fft);
FormalPowerSeries h_fft = h.cut_copy(2 * k);
atcoder::internal::butterfly(h_fft);
for (int i = 0; i < 2 * k; ++i) h_fft[i] = (h_fft[i] - f_fft[i] * f_fft[i]) * g_fft[i];
atcoder::internal::butterfly_inv(h_fft);
f.resize(2 * k);
const value_type inv_scale = value_type(4 * k).inv();
for (int i = 0; i < k; ++i) f[k + i] = h_fft[k + i] * inv_scale;
}
f.resize(m), f <<= (tlz / 2);
return f;
}
FormalPowerSeries& sqrt_inplace(int n = -1) { return *this = sqrt(n); }
FormalPowerSeries sqrt(int n = -1) const {
return *safe_sqrt(n);
}
value_type eval(value_type x) const {
value_type y = 0;
for (int i = size() - 1; i >= 0; --i) y = y * x + (*this)[i];
return y;
}
static FormalPowerSeries prod(const std::vector<FormalPowerSeries>& fs) {
if (fs.empty()) return { 1 };
std::deque<FormalPowerSeries> dq(fs.begin(), fs.end());
std::sort(dq.begin(), dq.end(), [](auto& f, auto& g) { return f.size() < g.size(); });
while (dq.size() >= 2) {
dq.push_back(dq[0] * dq[1]);
dq.pop_front();
dq.pop_front();
}
return dq.front();
}
std::optional<std::vector<std::pair<int, value_type>>> sparse_fps_format(int max_size) const {
std::vector<std::pair<int, value_type>> res;
for (int i = 0; i <= deg() and int(res.size()) <= max_size; ++i) if (value_type v = (*this)[i]; v != 0) res.emplace_back(i, v);
if (int(res.size()) > max_size) return std::nullopt;
return res;
}
private:
static void update_inv(const int k, FormalPowerSeries& f_fft, FormalPowerSeries& g_fft, FormalPowerSeries& g) {
FormalPowerSeries fg(2 * k);
for (int i = 0; i < 2 * k; ++i) fg[i] = f_fft[i] * g_fft[i];
atcoder::internal::butterfly_inv(fg);
fg >>= k, fg.resize(2 * k);
atcoder::internal::butterfly(fg);
for (int i = 0; i < 2 * k; ++i) fg[i] *= g_fft[i];
atcoder::internal::butterfly_inv(fg);
const value_type inv_scale = value_type(2 * k).inv(), c = -inv_scale * inv_scale;
g.resize(2 * k);
for (int i = 0; i < k; ++i) g[k + i] = fg[i] * c;
}
static FormalPowerSeries div_fps_sparse(const FormalPowerSeries& f, const std::vector<std::pair<int, value_type>>& g, int n) {
const int size = g.size();
assert(size and g[0].first == 0);
const value_type inv_g0 = g[0].second.inv();
FormalPowerSeries h(n);
for (int i = 0; i < n; ++i) {
value_type v = f.safe_get(i);
for (int idx = 1; idx < size; ++idx) {
const auto& [j, gj] = g[idx];
if (j > i) break;
v -= gj * h[i - j];
}
h[i] = v * inv_g0;
}
return h;
}
static FormalPowerSeries inv_sparse(const std::vector<std::pair<int, value_type>>& g, const int n) {
return div_fps_sparse(FormalPowerSeries{ 1 }, g, n);
}
static FormalPowerSeries exp_sparse(const std::vector<std::pair<int, value_type>>& f, const int n) {
const int size = f.size();
assert(not size or f[0].first != 0);
FormalPowerSeries g(n);
g[0] = 1;
inv_mods<value_type> invs(n);
for (int i = 1; i < n; ++i) {
value_type v = 0;
for (const auto& [j, fj] : f) {
if (j > i) break;
v += j * fj * g[i - j];
}
v *= invs[i];
g[i] = v;
}
return g;
}
static FormalPowerSeries log_sparse(const std::vector<std::pair<int, value_type>>& f, const int n) {
const int size = f.size();
assert(size and f[0].first == 0 and f[0].second == 1);
FormalPowerSeries g(n);
for (int idx = 1; idx < size; ++idx) {
const auto& [j, fj] = f[idx];
if (j >= n) break;
g[j] = j * fj;
}
inv_mods<value_type> invs(n);
for (int i = 1; i < n; ++i) {
value_type v = g[i];
for (int idx = 1; idx < size; ++idx) {
const auto& [j, fj] = f[idx];
if (j > i) break;
v -= fj * g[i - j] * (i - j);
}
v *= invs[i];
g[i] = v;
}
return g;
}
static FormalPowerSeries pow_sparse(const std::vector<std::pair<int, value_type>>& f, const long long k, const int n) {
if (k == 0) {
FormalPowerSeries res(n, 0);
res[0] = 1;
return res;
}
const int size = f.size();
if (not size) return FormalPowerSeries(n, 0);
const int p = f[0].first;
if (p > (n - 1) / k) return FormalPowerSeries(n, 0);
const value_type inv_f0 = f[0].second.inv();
const int lz = p * k;
FormalPowerSeries g(n);
g[lz] = f[0].second.pow(k);
inv_mods<value_type> invs(n);
for (int i = 1; lz + i < n; ++i) {
value_type v = 0;
for (int idx = 1; idx < size; ++idx) {
auto [j, fj] = f[idx];
j -= p;
if (j > i) break;
v += fj * g[lz + i - j] * (value_type(k) * j - (i - j));
}
v *= invs[i] * inv_f0;
g[lz + i] = v;
}
return g;
}
static std::optional<FormalPowerSeries> safe_sqrt_sparse(const std::vector<std::pair<int, value_type>>& f, const int n) {
const int size = f.size();
if (not size) return FormalPowerSeries(n, 0);
const int p = f[0].first;
if (p % 2 == 1) return std::nullopt;
if (p / 2 >= n) return FormalPowerSeries(n, 0);
const value_type inv_f0 = f[0].second.inv();
const int lz = p / 2;
FormalPowerSeries g(n);
auto opt_g0 = ::safe_sqrt(f[0].second);
if (not opt_g0.has_value()) return std::nullopt;
g[lz] = *opt_g0;
value_type k = mint(2).inv();
inv_mods<value_type> invs(n);
for (int i = 1; lz + i < n; ++i) {
value_type v = 0;
for (int idx = 1; idx < size; ++idx) {
auto [j, fj] = f[idx];
j -= p;
if (j > i) break;
v += fj * g[lz + i - j] * (k * j - (i - j));
}
v *= invs[i] * inv_f0;
g[lz + i] = v;
}
return g;
}
static FormalPowerSeries sqrt_sparse(const std::vector<std::pair<int, value_type>>& f, const int n) {
return *safe_sqrt(f, n);
}
};
} // namespace suisen
template <typename mint>
suisen::FormalPowerSeries<mint> sqrt(suisen::FormalPowerSeries<mint> a) {
return a.sqrt();
}
template <typename mint>
suisen::FormalPowerSeries<mint> log(suisen::FormalPowerSeries<mint> a) {
return a.log();
}
template <typename mint>
suisen::FormalPowerSeries<mint> exp(suisen::FormalPowerSeries<mint> a) {
return a.exp();
}
template <typename mint, typename T>
suisen::FormalPowerSeries<mint> pow(suisen::FormalPowerSeries<mint> a, T b) {
return a.pow(b);
}
template <typename mint>
suisen::FormalPowerSeries<mint> inv(suisen::FormalPowerSeries<mint> a) {
return a.inv();
}
#line 8 "library/polynomial/compose_inv.hpp"
namespace suisen {
// @return g mod x^n s.t. f(g(x))=g(f(x))=x (mod x^n)
template <typename mint>
std::vector<mint> compositional_inv(std::vector<mint> f, const int n) {
using fps = FormalPowerSeries<mint>;
assert(f[0] == 0);
assert(f[1] != 0);
if (n == 0) return {};
if (n == 1) return { 0 };
f.erase(f.begin());
f.resize(n);
const int B = ::sqrt(n) + 1;
const int z = [n]{
int z = 1;
while (z < 2 * n - 1) z <<= 1;
return z;
}();
const mint inv_z = mint(z).inv();
f.resize(z);
atcoder::internal::butterfly(f);
auto mul_f = [&](std::vector<mint> a) {
a.resize(z);
atcoder::internal::butterfly(a);
for (int j = 0; j < z; ++j) a[j] *= f[j] * inv_z;
atcoder::internal::butterfly_inv(a);
a.resize(n);
return a;
};
std::vector<fps> h(B + 1, fps(n));
h[0][0] = 1;
for (int i = 1; i <= B; ++i) {
h[i] = mul_f(h[i - 1]);
}
std::vector<mint>& hB = h[B];
hB.resize(z);
atcoder::internal::butterfly(hB);
auto mul_hB = [&](std::vector<mint> a) {
a.resize(z);
atcoder::internal::butterfly(a);
for (int j = 0; j < z; ++j) a[j] *= hB[j] * inv_z;
atcoder::internal::butterfly_inv(a);
a.resize(n);
return a;
};
fps H(n);
H[0] = 1;
fps g(B * B);
fps I(B * B);
I[1] = 1;
for (int q = 0; q < B; ++q) {
// loop invariant
// - H = H_q, I = I_q
// - g[0,qB) is calculated
// (I_q / x^{qB}) / H_q
fps J = fps(I.begin() + q * B, I.begin() + (q * B + B)) * H.cut_copy(B).inv(B);
J.resize(B);
for (int r = 0; r < B; ++r) {
for (int i = 0; i < r; ++i) {
J[r] -= J[i] * h[i][r - i];
}
J[r] /= h[r][0];
}
std::move(J.begin(), J.end(), g.begin() + (B * q));
if (q == B - 1) break;
// update I_q
const int size_q = n - q * B;
J.assign(size_q, 0);
for (int r = 0; r < B; ++r) {
const mint gr = g[q * B + r];
for (int k = 0; k < size_q - r; ++k) {
J[r + k] += gr * h[r][k];
}
}
J *= H.cut_copy(size_q);
for (int k = 0; k < size_q; ++k) {
I[q * B + k] -= J[k];
}
// update H_q
H = mul_hB(H);
}
g.resize(n);
return g;
}
} // namespace suisen