#line 1 "verify/verify-yosupo-ntt/yosupo-multivariate-circular-convolution.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/multivariate_convolution_cyclic"
//
#line 2 "template/template.hpp"
using namespace std ;
// intrinstic
#include <immintrin.h>
#include <algorithm>
#include <array>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cfenv>
#include <cfloat>
#include <chrono>
#include <cinttypes>
#include <climits>
#include <cmath>
#include <complex>
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <fstream>
#include <functional>
#include <initializer_list>
#include <iomanip>
#include <ios>
#include <iostream>
#include <istream>
#include <iterator>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <new>
#include <numeric>
#include <ostream>
#include <queue>
#include <random>
#include <set>
#include <sstream>
#include <stack>
#include <streambuf>
#include <string>
#include <tuple>
#include <type_traits>
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
// utility
#line 3 "template/util.hpp"
namespace Nyaan {
using ll = long long ;
using i64 = long long ;
using u64 = unsigned long long ;
using i128 = __int128_t ;
using u128 = __uint128_t ;
template < typename T >
using V = vector < T > ;
template < typename T >
using VV = vector < vector < T >> ;
using vi = vector < int > ;
using vl = vector < long long > ;
using vd = V < double > ;
using vs = V < string > ;
using vvi = vector < vector < int >> ;
using vvl = vector < vector < long long >> ;
template < typename T >
using minpq = priority_queue < T , vector < T > , greater < T >> ;
template < typename T , typename U >
struct P : pair < T , U > {
template < typename ... Args >
P ( Args ... args ) : pair < T , U > ( args ...) {}
using pair < T , U >:: first ;
using pair < T , U >:: second ;
P & operator += ( const P & r ) {
first += r . first ;
second += r . second ;
return * this ;
}
P & operator -= ( const P & r ) {
first -= r . first ;
second -= r . second ;
return * this ;
}
P & operator *= ( const P & r ) {
first *= r . first ;
second *= r . second ;
return * this ;
}
template < typename S >
P & operator *= ( const S & r ) {
first *= r , second *= r ;
return * this ;
}
P operator + ( const P & r ) const { return P ( * this ) += r ; }
P operator - ( const P & r ) const { return P ( * this ) -= r ; }
P operator * ( const P & r ) const { return P ( * this ) *= r ; }
template < typename S >
P operator * ( const S & r ) const {
return P ( * this ) *= r ;
}
P operator - () const { return P { - first , - second }; }
};
using pl = P < ll , ll > ;
using pi = P < int , int > ;
using vp = V < pl > ;
constexpr int inf = 1001001001 ;
constexpr long long infLL = 4004004004004004004LL ;
template < typename T >
int sz ( const T & t ) {
return t . size ();
}
template < typename T , typename U >
inline bool amin ( T & x , U y ) {
return ( y < x ) ? ( x = y , true ) : false ;
}
template < typename T , typename U >
inline bool amax ( T & x , U y ) {
return ( x < y ) ? ( x = y , true ) : false ;
}
template < typename T >
inline T Max ( const vector < T > & v ) {
return * max_element ( begin ( v ), end ( v ));
}
template < typename T >
inline T Min ( const vector < T > & v ) {
return * min_element ( begin ( v ), end ( v ));
}
template < typename T >
inline long long Sum ( const vector < T > & v ) {
return accumulate ( begin ( v ), end ( v ), 0LL );
}
template < typename T >
int lb ( const vector < T > & v , const T & a ) {
return lower_bound ( begin ( v ), end ( v ), a ) - begin ( v );
}
template < typename T >
int ub ( const vector < T > & v , const T & a ) {
return upper_bound ( begin ( v ), end ( v ), a ) - begin ( v );
}
constexpr long long TEN ( int n ) {
long long ret = 1 , x = 10 ;
for (; n ; x *= x , n >>= 1 ) ret *= ( n & 1 ? x : 1 );
return ret ;
}
template < typename T , typename U >
pair < T , U > mkp ( const T & t , const U & u ) {
return make_pair ( t , u );
}
template < typename T >
vector < T > mkrui ( const vector < T > & v , bool rev = false ) {
vector < T > ret ( v . size () + 1 );
if ( rev ) {
for ( int i = int ( v . size ()) - 1 ; i >= 0 ; i -- ) ret [ i ] = v [ i ] + ret [ i + 1 ];
} else {
for ( int i = 0 ; i < int ( v . size ()); i ++ ) ret [ i + 1 ] = ret [ i ] + v [ i ];
}
return ret ;
};
template < typename T >
vector < T > mkuni ( const vector < T > & v ) {
vector < T > ret ( v );
sort ( ret . begin (), ret . end ());
ret . erase ( unique ( ret . begin (), ret . end ()), ret . end ());
return ret ;
}
template < typename F >
vector < int > mkord ( int N , F f ) {
vector < int > ord ( N );
iota ( begin ( ord ), end ( ord ), 0 );
sort ( begin ( ord ), end ( ord ), f );
return ord ;
}
template < typename T >
vector < int > mkinv ( vector < T > & v ) {
int max_val = * max_element ( begin ( v ), end ( v ));
vector < int > inv ( max_val + 1 , - 1 );
for ( int i = 0 ; i < ( int ) v . size (); i ++ ) inv [ v [ i ]] = i ;
return inv ;
}
vector < int > mkiota ( int n ) {
vector < int > ret ( n );
iota ( begin ( ret ), end ( ret ), 0 );
return ret ;
}
template < typename T >
T mkrev ( const T & v ) {
T w { v };
reverse ( begin ( w ), end ( w ));
return w ;
}
template < typename T >
bool nxp ( T & v ) {
return next_permutation ( begin ( v ), end ( v ));
}
// 返り値の型は入力の T に依存
// i 要素目 : [0, a[i])
template < typename T >
vector < vector < T >> product ( const vector < T > & a ) {
vector < vector < T >> ret ;
vector < T > v ;
auto dfs = [ & ]( auto rc , int i ) -> void {
if ( i == ( int ) a . size ()) {
ret . push_back ( v );
return ;
}
for ( int j = 0 ; j < a [ i ]; j ++ ) v . push_back ( j ), rc ( rc , i + 1 ), v . pop_back ();
};
dfs ( dfs , 0 );
return ret ;
}
// F : void(T&), mod を取る操作
// T : 整数型のときはオーバーフローに注意する
template < typename T , typename F >
T Power ( T a , long long n , const T & I , F && f ) {
static_assert ( std :: is_invocable_r_v < void , F & , T &> ,
"Power callback must be callable as void(T&)" );
T res = I ;
for (; n ; std :: invoke ( f , a = a * a ), n >>= 1 ) {
if ( n & 1 ) std :: invoke ( f , res = res * a );
}
return res ;
}
// T : 整数型のときはオーバーフローに注意する
template < typename T >
T Power ( T a , long long n , const T & I = T { 1 }) {
auto no_op = []( T & ) -> void {};
return Power ( a , n , I , no_op );
}
template < typename T >
T Rev ( const T & v ) {
T res = v ;
reverse ( begin ( res ), end ( res ));
return res ;
}
template < typename T >
vector < T > Transpose ( const vector < T > & v ) {
using U = typename T :: value_type ;
if ( v . empty ()) return {};
int H = v . size (), W = v [ 0 ]. size ();
vector res ( W , T ( H , U {}));
for ( int i = 0 ; i < H ; i ++ ) {
for ( int j = 0 ; j < W ; j ++ ) {
res [ j ][ i ] = v [ i ][ j ];
}
}
return res ;
}
template < typename T >
vector < T > Rotate ( const vector < T > & v , int clockwise = true ) {
using U = typename T :: value_type ;
int H = v . size (), W = v [ 0 ]. size ();
vector res ( W , T ( H , U {}));
for ( int i = 0 ; i < H ; i ++ ) {
for ( int j = 0 ; j < W ; j ++ ) {
if ( clockwise ) {
res [ W - 1 - j ][ i ] = v [ i ][ j ];
} else {
res [ j ][ H - 1 - i ] = v [ i ][ j ];
}
}
}
return res ;
}
} // namespace Nyaan
#line 58 "template/template.hpp"
// bit operation
#line 1 "template/bitop.hpp"
namespace Nyaan {
__attribute__ (( target ( "popcnt" ))) inline int popcnt ( const u64 & a ) {
return __builtin_popcountll ( a );
}
inline int lsb ( const u64 & a ) { return a ? __builtin_ctzll ( a ) : 64 ; }
inline int ctz ( const u64 & a ) { return a ? __builtin_ctzll ( a ) : 64 ; }
inline int msb ( const u64 & a ) { return a ? 63 - __builtin_clzll ( a ) : - 1 ; }
template < typename T >
inline int gbit ( const T & a , int i ) {
return ( a >> i ) & 1 ;
}
template < typename T >
inline void sbit ( T & a , int i , bool b ) {
if ( gbit ( a , i ) != b ) a ^= T ( 1 ) << i ;
}
constexpr long long PW ( int n ) { return 1LL << n ; }
constexpr long long MSK ( int n ) { return ( 1LL << n ) - 1 ; }
} // namespace Nyaan
#line 61 "template/template.hpp"
// inout
#line 1 "template/inout.hpp"
namespace Nyaan {
template < typename T , typename U >
ostream & operator << ( ostream & os , const pair < T , U > & p ) {
os << p . first << " " << p . second ;
return os ;
}
template < typename T , typename U >
istream & operator >> ( istream & is , pair < T , U > & p ) {
is >> p . first >> p . second ;
return is ;
}
template < typename T >
ostream & operator << ( ostream & os , const vector < T > & v ) {
int s = ( int ) v . size ();
for ( int i = 0 ; i < s ; i ++ ) os << ( i ? " " : "" ) << v [ i ];
return os ;
}
template < typename T >
istream & operator >> ( istream & is , vector < T > & v ) {
for ( auto & x : v ) is >> x ;
return is ;
}
istream & operator >> ( istream & is , __int128_t & x ) {
string S ;
is >> S ;
x = 0 ;
int flag = 0 ;
for ( auto & c : S ) {
if ( c == '-' ) {
flag = true ;
continue ;
}
x *= 10 ;
x += c - '0' ;
}
if ( flag ) x = - x ;
return is ;
}
istream & operator >> ( istream & is , __uint128_t & x ) {
string S ;
is >> S ;
x = 0 ;
for ( auto & c : S ) {
x *= 10 ;
x += c - '0' ;
}
return is ;
}
ostream & operator << ( ostream & os , __int128_t x ) {
if ( x == 0 ) return os << 0 ;
if ( x < 0 ) os << '-' , x = - x ;
string S ;
while ( x ) S . push_back ( '0' + x % 10 ), x /= 10 ;
reverse ( begin ( S ), end ( S ));
return os << S ;
}
ostream & operator << ( ostream & os , __uint128_t x ) {
if ( x == 0 ) return os << 0 ;
string S ;
while ( x ) S . push_back ( '0' + x % 10 ), x /= 10 ;
reverse ( begin ( S ), end ( S ));
return os << S ;
}
void in () {}
template < typename T , class ... U >
void in ( T & t , U & ... u ) {
cin >> t ;
in ( u ...);
}
void out () { cout << " \n " ; }
template < typename T , class ... U , char sep = ' ' >
void out ( const T & t , const U & ... u ) {
cout << t ;
if ( sizeof ...( u )) cout << sep ;
out ( u ...);
}
struct IoSetupNya {
IoSetupNya () {
cin . tie ( nullptr );
ios :: sync_with_stdio ( false );
cout << fixed << setprecision ( 15 );
cerr << fixed << setprecision ( 7 );
}
} iosetupnya ;
} // namespace Nyaan
#line 64 "template/template.hpp"
// debug
#line 1 "template/debug.hpp"
namespace DebugImpl {
template < typename U , typename = void >
struct is_specialize : false_type {};
template < typename U >
struct is_specialize <
U , typename conditional < false , typename U :: iterator , void >:: type >
: true_type {};
template < typename U >
struct is_specialize <
U , typename conditional < false , decltype ( U :: first ), void >:: type >
: true_type {};
template < typename U >
struct is_specialize < U , enable_if_t < is_integral < U >:: value , void >> : true_type {
};
void dump ( const char & t ) { cerr << t ; }
void dump ( const string & t ) { cerr << t ; }
void dump ( const bool & t ) { cerr << ( t ? "true" : "false" ); }
void dump ( __int128_t t ) {
if ( t == 0 ) cerr << 0 ;
if ( t < 0 ) cerr << '-' , t = - t ;
string S ;
while ( t ) S . push_back ( '0' + t % 10 ), t /= 10 ;
reverse ( begin ( S ), end ( S ));
cerr << S ;
}
void dump ( __uint128_t t ) {
if ( t == 0 ) cerr << 0 ;
string S ;
while ( t ) S . push_back ( '0' + t % 10 ), t /= 10 ;
reverse ( begin ( S ), end ( S ));
cerr << S ;
}
template < typename U ,
enable_if_t <! is_specialize < U > :: value , nullptr_t > = nullptr >
void dump ( const U & t ) {
cerr << t ;
}
template < typename T >
void dump ( const T & t , enable_if_t < is_integral < T >:: value >* = nullptr ) {
string res ;
if ( t == Nyaan :: inf ) res = "inf" ;
if constexpr ( is_signed < T >:: value ) {
if ( t == - Nyaan :: inf ) res = "-inf" ;
}
if constexpr ( sizeof ( T ) == 8 ) {
if ( t == Nyaan :: infLL ) res = "inf" ;
if constexpr ( is_signed < T >:: value ) {
if ( t == - Nyaan :: infLL ) res = "-inf" ;
}
}
if ( res . empty ()) res = to_string ( t );
cerr << res ;
}
template < typename T , typename U >
void dump ( const pair < T , U >& );
template < typename T >
void dump ( const pair < T * , int >& );
template < typename T >
void dump ( const T & t ,
enable_if_t <! is_void < typename T :: iterator >:: value >* = nullptr ) {
cerr << "[ " ;
for ( auto it = t . begin (); it != t . end ();) {
dump ( * it );
cerr << ( ++ it == t . end () ? "" : ", " );
}
cerr << " ]" ;
}
template < typename T , typename U >
void dump ( const pair < T , U >& t ) {
cerr << "( " ;
dump ( t . first );
cerr << ", " ;
dump ( t . second );
cerr << " )" ;
}
template < typename T >
void dump ( const pair < T * , int >& t ) {
cerr << "[ " ;
for ( int i = 0 ; i < t . second ; i ++ ) {
dump ( t . first [ i ]);
cerr << ( i == t . second - 1 ? "" : ", " );
}
cerr << " ]" ;
}
void trace () { cerr << endl ; }
template < typename Head , typename ... Tail >
void trace ( Head && head , Tail && ... tail ) {
cerr << " " ;
dump ( head );
if ( sizeof ...( tail ) != 0 ) cerr << "," ;
trace ( std :: forward < Tail > ( tail )...);
}
} // namespace DebugImpl
#ifdef NyaanDebug
#define trc(...) \
do { \
cerr << "## " << #__VA_ARGS__ << " = "; \
DebugImpl::trace(__VA_ARGS__); \
} while (0)
#else
#define trc(...) (void(0))
#endif
#ifdef NyaanLocal
#define trc2(...) \
do { \
cerr << "## " << #__VA_ARGS__ << " = "; \
DebugImpl::trace(__VA_ARGS__); \
} while (0)
#else
#define trc2(...) (void(0))
#endif
#line 67 "template/template.hpp"
// macro
#line 1 "template/macro.hpp"
#define each(x, v) for (auto&& x : v)
#define each2(x, y, v) for (auto&& [x, y] : v)
#define all(v) (v).begin(), (v).end()
#define rep(i, N) for (long long i = 0; i < (long long)(N); i++)
#define repr(i, N) for (long long i = (long long)(N)-1; i >= 0; i--)
#define rep1(i, N) for (long long i = 1; i <= (long long)(N); i++)
#define repr1(i, N) for (long long i = (N); (long long)(i) > 0; i--)
#define reg(i, a, b) for (long long i = (a); i < (b); i++)
#define regr(i, a, b) for (long long i = (b)-1; i >= (a); i--)
#define fi first
#define se second
#define ini(...) \
int __VA_ARGS__; \
in(__VA_ARGS__)
#define inl(...) \
long long __VA_ARGS__; \
in(__VA_ARGS__)
#define ins(...) \
string __VA_ARGS__; \
in(__VA_ARGS__)
#define in2(s, t) \
for (int i = 0; i < (int)s.size(); i++) { \
in(s[i], t[i]); \
}
#define in3(s, t, u) \
for (int i = 0; i < (int)s.size(); i++) { \
in(s[i], t[i], u[i]); \
}
#define in4(s, t, u, v) \
for (int i = 0; i < (int)s.size(); i++) { \
in(s[i], t[i], u[i], v[i]); \
}
#define die(...) \
do { \
Nyaan::out(__VA_ARGS__); \
return; \
} while (0)
#line 70 "template/template.hpp"
namespace Nyaan {
void solve ();
}
int main () { Nyaan :: solve (); }
#line 4 "verify/verify-yosupo-ntt/yosupo-multivariate-circular-convolution.test.cpp"
//
#line 2 "ntt/multivariate-circular-convolution.hpp"
//
#line 6 "ntt/multivariate-circular-convolution.hpp"
using namespace std ;
#line 2 "fps/arbitrary-fps.hpp"
#line 4 "fps/arbitrary-fps.hpp"
#line 2 "ntt/arbitrary-ntt.hpp"
#line 6 "ntt/arbitrary-ntt.hpp"
using namespace std ;
#line 2 "modint/montgomery-modint.hpp"
#line 5 "modint/montgomery-modint.hpp"
template < uint32_t mod >
struct LazyMontgomeryModInt {
using mint = LazyMontgomeryModInt ;
using i32 = int32_t ;
using u32 = uint32_t ;
using u64 = uint64_t ;
static constexpr u32 get_r () {
u32 ret = mod ;
for ( i32 i = 0 ; i < 4 ; ++ i ) ret *= 2 - mod * ret ;
return ret ;
}
static constexpr u32 r = get_r ();
static constexpr u32 n2 = - u64 ( mod ) % mod ;
static_assert ( mod < ( 1 << 30 ), "invalid, mod >= 2 ^ 30" );
static_assert (( mod & 1 ) == 1 , "invalid, mod % 2 == 0" );
static_assert ( r * mod == 1 , "this code has bugs." );
u32 a ;
constexpr LazyMontgomeryModInt () : a ( 0 ) {}
constexpr LazyMontgomeryModInt ( const int64_t & b )
: a ( reduce ( u64 ( b % mod + mod ) * n2 )){};
static constexpr u32 reduce ( const u64 & b ) {
return ( b + u64 ( u32 ( b ) * u32 ( - r )) * mod ) >> 32 ;
}
constexpr mint & operator += ( const mint & b ) {
if ( i32 ( a += b . a - 2 * mod ) < 0 ) a += 2 * mod ;
return * this ;
}
constexpr mint & operator -= ( const mint & b ) {
if ( i32 ( a -= b . a ) < 0 ) a += 2 * mod ;
return * this ;
}
constexpr mint & operator *= ( const mint & b ) {
a = reduce ( u64 ( a ) * b . a );
return * this ;
}
constexpr mint & operator /= ( const mint & b ) {
* this *= b . inverse ();
return * this ;
}
constexpr mint operator + ( const mint & b ) const { return mint ( * this ) += b ; }
constexpr mint operator - ( const mint & b ) const { return mint ( * this ) -= b ; }
constexpr mint operator * ( const mint & b ) const { return mint ( * this ) *= b ; }
constexpr mint operator / ( const mint & b ) const { return mint ( * this ) /= b ; }
constexpr bool operator == ( const mint & b ) const {
return ( a >= mod ? a - mod : a ) == ( b . a >= mod ? b . a - mod : b . a );
}
constexpr bool operator != ( const mint & b ) const {
return ( a >= mod ? a - mod : a ) != ( b . a >= mod ? b . a - mod : b . a );
}
constexpr mint operator - () const { return mint () - mint ( * this ); }
constexpr mint operator + () const { return mint ( * this ); }
constexpr mint pow ( u64 n ) const {
mint ret ( 1 ), mul ( * this );
while ( n > 0 ) {
if ( n & 1 ) ret *= mul ;
mul *= mul ;
n >>= 1 ;
}
return ret ;
}
constexpr mint inverse () const {
int x = get (), y = mod , u = 1 , v = 0 , t = 0 , tmp = 0 ;
while ( y > 0 ) {
t = x / y ;
x -= t * y , u -= t * v ;
tmp = x , x = y , y = tmp ;
tmp = u , u = v , v = tmp ;
}
return mint { u };
}
friend std :: ostream & operator << ( std :: ostream & os , const mint & b ) {
return os << b . get ();
}
friend std :: istream & operator >> ( std :: istream & is , mint & b ) {
int64_t t ;
is >> t ;
b = LazyMontgomeryModInt < mod > ( t );
return ( is );
}
constexpr u32 get () const {
u32 ret = reduce ( a );
return ret >= mod ? ret - mod : ret ;
}
static constexpr u32 get_mod () { return mod ; }
};
#line 2 "ntt/ntt.hpp"
#line 7 "ntt/ntt.hpp"
using namespace std ;
template < typename mint >
struct NTT {
static constexpr uint32_t get_pr () {
uint32_t _mod = mint :: get_mod ();
using u64 = uint64_t ;
u64 ds [ 32 ] = {};
int idx = 0 ;
u64 m = _mod - 1 ;
for ( u64 i = 2 ; i * i <= m ; ++ i ) {
if ( m % i == 0 ) {
ds [ idx ++ ] = i ;
while ( m % i == 0 ) m /= i ;
}
}
if ( m != 1 ) ds [ idx ++ ] = m ;
uint32_t _pr = 2 ;
while ( 1 ) {
int flg = 1 ;
for ( int i = 0 ; i < idx ; ++ i ) {
u64 a = _pr , b = ( _mod - 1 ) / ds [ i ], r = 1 ;
while ( b ) {
if ( b & 1 ) r = r * a % _mod ;
a = a * a % _mod ;
b >>= 1 ;
}
if ( r == 1 ) {
flg = 0 ;
break ;
}
}
if ( flg == 1 ) break ;
++ _pr ;
}
return _pr ;
};
static constexpr uint32_t mod = mint :: get_mod ();
static constexpr uint32_t pr = get_pr ();
static constexpr int level = __builtin_ctzll ( mod - 1 );
mint dw [ level ], dy [ level ];
void setwy ( int k ) {
mint w [ level ], y [ level ];
w [ k - 1 ] = mint ( pr ). pow (( mod - 1 ) / ( 1 << k ));
y [ k - 1 ] = w [ k - 1 ]. inverse ();
for ( int i = k - 2 ; i > 0 ; -- i )
w [ i ] = w [ i + 1 ] * w [ i + 1 ], y [ i ] = y [ i + 1 ] * y [ i + 1 ];
dw [ 1 ] = w [ 1 ], dy [ 1 ] = y [ 1 ], dw [ 2 ] = w [ 2 ], dy [ 2 ] = y [ 2 ];
for ( int i = 3 ; i < k ; ++ i ) {
dw [ i ] = dw [ i - 1 ] * y [ i - 2 ] * w [ i ];
dy [ i ] = dy [ i - 1 ] * w [ i - 2 ] * y [ i ];
}
}
NTT () { setwy ( level ); }
void fft4 ( vector < mint > & a , int k ) {
if (( int ) a . size () <= 1 ) return ;
if ( k == 1 ) {
mint a1 = a [ 1 ];
a [ 1 ] = a [ 0 ] - a [ 1 ];
a [ 0 ] = a [ 0 ] + a1 ;
return ;
}
if ( k & 1 ) {
int v = 1 << ( k - 1 );
for ( int j = 0 ; j < v ; ++ j ) {
mint ajv = a [ j + v ];
a [ j + v ] = a [ j ] - ajv ;
a [ j ] += ajv ;
}
}
int u = 1 << ( 2 + ( k & 1 ));
int v = 1 << ( k - 2 - ( k & 1 ));
mint one = mint ( 1 );
mint imag = dw [ 1 ];
while ( v ) {
// jh = 0
{
int j0 = 0 ;
int j1 = v ;
int j2 = j1 + v ;
int j3 = j2 + v ;
for (; j0 < v ; ++ j0 , ++ j1 , ++ j2 , ++ j3 ) {
mint t0 = a [ j0 ], t1 = a [ j1 ], t2 = a [ j2 ], t3 = a [ j3 ];
mint t0p2 = t0 + t2 , t1p3 = t1 + t3 ;
mint t0m2 = t0 - t2 , t1m3 = ( t1 - t3 ) * imag ;
a [ j0 ] = t0p2 + t1p3 , a [ j1 ] = t0p2 - t1p3 ;
a [ j2 ] = t0m2 + t1m3 , a [ j3 ] = t0m2 - t1m3 ;
}
}
// jh >= 1
mint ww = one , xx = one * dw [ 2 ], wx = one ;
for ( int jh = 4 ; jh < u ;) {
ww = xx * xx , wx = ww * xx ;
int j0 = jh * v ;
int je = j0 + v ;
int j2 = je + v ;
for (; j0 < je ; ++ j0 , ++ j2 ) {
mint t0 = a [ j0 ], t1 = a [ j0 + v ] * xx , t2 = a [ j2 ] * ww ,
t3 = a [ j2 + v ] * wx ;
mint t0p2 = t0 + t2 , t1p3 = t1 + t3 ;
mint t0m2 = t0 - t2 , t1m3 = ( t1 - t3 ) * imag ;
a [ j0 ] = t0p2 + t1p3 , a [ j0 + v ] = t0p2 - t1p3 ;
a [ j2 ] = t0m2 + t1m3 , a [ j2 + v ] = t0m2 - t1m3 ;
}
xx *= dw [ __builtin_ctzll (( jh += 4 ))];
}
u <<= 2 ;
v >>= 2 ;
}
}
void ifft4 ( vector < mint > & a , int k ) {
if (( int ) a . size () <= 1 ) return ;
if ( k == 1 ) {
mint a1 = a [ 1 ];
a [ 1 ] = a [ 0 ] - a [ 1 ];
a [ 0 ] = a [ 0 ] + a1 ;
return ;
}
int u = 1 << ( k - 2 );
int v = 1 ;
mint one = mint ( 1 );
mint imag = dy [ 1 ];
while ( u ) {
// jh = 0
{
int j0 = 0 ;
int j1 = v ;
int j2 = v + v ;
int j3 = j2 + v ;
for (; j0 < v ; ++ j0 , ++ j1 , ++ j2 , ++ j3 ) {
mint t0 = a [ j0 ], t1 = a [ j1 ], t2 = a [ j2 ], t3 = a [ j3 ];
mint t0p1 = t0 + t1 , t2p3 = t2 + t3 ;
mint t0m1 = t0 - t1 , t2m3 = ( t2 - t3 ) * imag ;
a [ j0 ] = t0p1 + t2p3 , a [ j2 ] = t0p1 - t2p3 ;
a [ j1 ] = t0m1 + t2m3 , a [ j3 ] = t0m1 - t2m3 ;
}
}
// jh >= 1
mint ww = one , xx = one * dy [ 2 ], yy = one ;
u <<= 2 ;
for ( int jh = 4 ; jh < u ;) {
ww = xx * xx , yy = xx * imag ;
int j0 = jh * v ;
int je = j0 + v ;
int j2 = je + v ;
for (; j0 < je ; ++ j0 , ++ j2 ) {
mint t0 = a [ j0 ], t1 = a [ j0 + v ], t2 = a [ j2 ], t3 = a [ j2 + v ];
mint t0p1 = t0 + t1 , t2p3 = t2 + t3 ;
mint t0m1 = ( t0 - t1 ) * xx , t2m3 = ( t2 - t3 ) * yy ;
a [ j0 ] = t0p1 + t2p3 , a [ j2 ] = ( t0p1 - t2p3 ) * ww ;
a [ j0 + v ] = t0m1 + t2m3 , a [ j2 + v ] = ( t0m1 - t2m3 ) * ww ;
}
xx *= dy [ __builtin_ctzll ( jh += 4 )];
}
u >>= 4 ;
v <<= 2 ;
}
if ( k & 1 ) {
u = 1 << ( k - 1 );
for ( int j = 0 ; j < u ; ++ j ) {
mint ajv = a [ j ] - a [ j + u ];
a [ j ] += a [ j + u ];
a [ j + u ] = ajv ;
}
}
}
void ntt ( vector < mint > & a ) {
if (( int ) a . size () <= 1 ) return ;
fft4 ( a , __builtin_ctz ( a . size ()));
}
void intt ( vector < mint > & a ) {
if (( int ) a . size () <= 1 ) return ;
ifft4 ( a , __builtin_ctz ( a . size ()));
mint iv = mint ( a . size ()). inverse ();
for ( auto & x : a ) x *= iv ;
}
vector < mint > multiply ( const vector < mint > & a , const vector < mint > & b ) {
int l = a . size () + b . size () - 1 ;
if ( min < int > ( a . size (), b . size ()) <= 40 ) {
vector < mint > s ( l );
for ( int i = 0 ; i < ( int ) a . size (); ++ i )
for ( int j = 0 ; j < ( int ) b . size (); ++ j ) s [ i + j ] += a [ i ] * b [ j ];
return s ;
}
int k = 2 , M = 4 ;
while ( M < l ) M <<= 1 , ++ k ;
setwy ( k );
vector < mint > s ( M );
for ( int i = 0 ; i < ( int ) a . size (); ++ i ) s [ i ] = a [ i ];
fft4 ( s , k );
if ( a . size () == b . size () && a == b ) {
for ( int i = 0 ; i < M ; ++ i ) s [ i ] *= s [ i ];
} else {
vector < mint > t ( M );
for ( int i = 0 ; i < ( int ) b . size (); ++ i ) t [ i ] = b [ i ];
fft4 ( t , k );
for ( int i = 0 ; i < M ; ++ i ) s [ i ] *= t [ i ];
}
ifft4 ( s , k );
s . resize ( l );
mint invm = mint ( M ). inverse ();
for ( int i = 0 ; i < l ; ++ i ) s [ i ] *= invm ;
return s ;
}
void ntt_doubling ( vector < mint > & a ) {
int M = ( int ) a . size ();
auto b = a ;
intt ( b );
mint r = 1 , zeta = mint ( pr ). pow (( mint :: get_mod () - 1 ) / ( M << 1 ));
for ( int i = 0 ; i < M ; i ++ ) b [ i ] *= r , r *= zeta ;
ntt ( b );
copy ( begin ( b ), end ( b ), back_inserter ( a ));
}
};
#line 10 "ntt/arbitrary-ntt.hpp"
namespace ArbitraryNTT {
using i64 = int64_t ;
using u128 = __uint128_t ;
constexpr int32_t m0 = 167772161 ;
constexpr int32_t m1 = 469762049 ;
constexpr int32_t m2 = 754974721 ;
using mint0 = LazyMontgomeryModInt < m0 > ;
using mint1 = LazyMontgomeryModInt < m1 > ;
using mint2 = LazyMontgomeryModInt < m2 > ;
constexpr int r01 = mint1 ( m0 ). inverse (). get ();
constexpr int r02 = mint2 ( m0 ). inverse (). get ();
constexpr int r12 = mint2 ( m1 ). inverse (). get ();
constexpr int r02r12 = i64 ( r02 ) * r12 % m2 ;
constexpr i64 w1 = m0 ;
constexpr i64 w2 = i64 ( m0 ) * m1 ;
template < typename T , typename submint >
vector < submint > mul ( const vector < T > & a , const vector < T > & b ) {
static NTT < submint > ntt ;
vector < submint > s ( a . size ()), t ( b . size ());
for ( int i = 0 ; i < ( int ) a . size (); ++ i ) s [ i ] = i64 ( a [ i ] % submint :: get_mod ());
for ( int i = 0 ; i < ( int ) b . size (); ++ i ) t [ i ] = i64 ( b [ i ] % submint :: get_mod ());
return ntt . multiply ( s , t );
}
template < typename T >
vector < int > multiply ( const vector < T > & s , const vector < T > & t , int mod ) {
auto d0 = mul < T , mint0 > ( s , t );
auto d1 = mul < T , mint1 > ( s , t );
auto d2 = mul < T , mint2 > ( s , t );
int n = d0 . size ();
vector < int > ret ( n );
const int W1 = w1 % mod ;
const int W2 = w2 % mod ;
for ( int i = 0 ; i < n ; i ++ ) {
int n1 = d1 [ i ]. get (), n2 = d2 [ i ]. get (), a = d0 [ i ]. get ();
int b = i64 ( n1 + m1 - a ) * r01 % m1 ;
int c = ( i64 ( n2 + m2 - a ) * r02r12 + i64 ( m2 - b ) * r12 ) % m2 ;
ret [ i ] = ( i64 ( a ) + i64 ( b ) * W1 + i64 ( c ) * W2 ) % mod ;
}
return ret ;
}
template < typename mint >
vector < mint > multiply ( const vector < mint > & a , const vector < mint > & b ) {
if ( a . size () == 0 && b . size () == 0 ) return {};
if ( min < int > ( a . size (), b . size ()) < 128 ) {
vector < mint > ret ( a . size () + b . size () - 1 );
for ( int i = 0 ; i < ( int ) a . size (); ++ i )
for ( int j = 0 ; j < ( int ) b . size (); ++ j ) ret [ i + j ] += a [ i ] * b [ j ];
return ret ;
}
vector < int > s ( a . size ()), t ( b . size ());
for ( int i = 0 ; i < ( int ) a . size (); ++ i ) s [ i ] = a [ i ]. get ();
for ( int i = 0 ; i < ( int ) b . size (); ++ i ) t [ i ] = b [ i ]. get ();
vector < int > u = multiply < int > ( s , t , mint :: get_mod ());
vector < mint > ret ( u . size ());
for ( int i = 0 ; i < ( int ) u . size (); ++ i ) ret [ i ] = mint ( u [ i ]);
return ret ;
}
template < typename T >
vector < u128 > multiply_u128 ( const vector < T > & s , const vector < T > & t ) {
if ( s . size () == 0 && t . size () == 0 ) return {};
if ( min < int > ( s . size (), t . size ()) < 128 ) {
vector < u128 > ret ( s . size () + t . size () - 1 );
for ( int i = 0 ; i < ( int ) s . size (); ++ i )
for ( int j = 0 ; j < ( int ) t . size (); ++ j ) ret [ i + j ] += i64 ( s [ i ]) * t [ j ];
return ret ;
}
auto d0 = mul < T , mint0 > ( s , t );
auto d1 = mul < T , mint1 > ( s , t );
auto d2 = mul < T , mint2 > ( s , t );
int n = d0 . size ();
vector < u128 > ret ( n );
for ( int i = 0 ; i < n ; i ++ ) {
i64 n1 = d1 [ i ]. get (), n2 = d2 [ i ]. get ();
i64 a = d0 [ i ]. get ();
i64 b = ( n1 + m1 - a ) * r01 % m1 ;
i64 c = (( n2 + m2 - a ) * r02r12 + ( m2 - b ) * r12 ) % m2 ;
ret [ i ] = a + b * w1 + u128 ( c ) * w2 ;
}
return ret ;
}
} // namespace ArbitraryNTT
#line 2 "fps/formal-power-series.hpp"
#line 8 "fps/formal-power-series.hpp"
using namespace std ;
template < typename mint >
struct FormalPowerSeries : vector < mint > {
using vector < mint >:: vector ;
using FPS = FormalPowerSeries ;
FPS & operator += ( const FPS & r ) {
if ( r . size () > this -> size ()) this -> resize ( r . size ());
for ( int i = 0 ; i < ( int ) r . size (); i ++ ) ( * this )[ i ] += r [ i ];
return * this ;
}
FPS & operator += ( const mint & r ) {
if ( this -> empty ()) this -> resize ( 1 );
( * this )[ 0 ] += r ;
return * this ;
}
FPS & operator -= ( const FPS & r ) {
if ( r . size () > this -> size ()) this -> resize ( r . size ());
for ( int i = 0 ; i < ( int ) r . size (); i ++ ) ( * this )[ i ] -= r [ i ];
return * this ;
}
FPS & operator -= ( const mint & r ) {
if ( this -> empty ()) this -> resize ( 1 );
( * this )[ 0 ] -= r ;
return * this ;
}
FPS & operator *= ( const mint & v ) {
for ( int k = 0 ; k < ( int ) this -> size (); k ++ ) ( * this )[ k ] *= v ;
return * this ;
}
FPS & operator /= ( const FPS & r ) {
if ( this -> size () < r . size ()) {
this -> clear ();
return * this ;
}
int n = this -> size () - r . size () + 1 ;
if (( int ) r . size () <= 64 ) {
FPS f ( * this ), g ( r );
g . shrink ();
mint coeff = g . back (). inverse ();
for ( auto & x : g ) x *= coeff ;
int deg = ( int ) f . size () - ( int ) g . size () + 1 ;
int gs = g . size ();
FPS quo ( deg );
for ( int i = deg - 1 ; i >= 0 ; i -- ) {
quo [ i ] = f [ i + gs - 1 ];
for ( int j = 0 ; j < gs ; j ++ ) f [ i + j ] -= quo [ i ] * g [ j ];
}
* this = quo * coeff ;
this -> resize ( n , mint ( 0 ));
return * this ;
}
return * this = (( * this ). rev (). pre ( n ) * r . rev (). inv ( n )). pre ( n ). rev ();
}
FPS & operator %= ( const FPS & r ) {
* this -= * this / r * r ;
shrink ();
return * this ;
}
FPS operator + ( const FPS & r ) const { return FPS ( * this ) += r ; }
FPS operator + ( const mint & v ) const { return FPS ( * this ) += v ; }
FPS operator - ( const FPS & r ) const { return FPS ( * this ) -= r ; }
FPS operator - ( const mint & v ) const { return FPS ( * this ) -= v ; }
FPS operator * ( const FPS & r ) const { return FPS ( * this ) *= r ; }
FPS operator * ( const mint & v ) const { return FPS ( * this ) *= v ; }
FPS operator / ( const FPS & r ) const { return FPS ( * this ) /= r ; }
FPS operator % ( const FPS & r ) const { return FPS ( * this ) %= r ; }
FPS operator - () const {
FPS ret ( this -> size ());
for ( int i = 0 ; i < ( int ) this -> size (); i ++ ) ret [ i ] = - ( * this )[ i ];
return ret ;
}
void shrink () {
while ( this -> size () && this -> back () == mint ( 0 )) this -> pop_back ();
}
FPS rev () const {
FPS ret ( * this );
reverse ( begin ( ret ), end ( ret ));
return ret ;
}
FPS dot ( FPS r ) const {
FPS ret ( min ( this -> size (), r . size ()));
for ( int i = 0 ; i < ( int ) ret . size (); i ++ ) ret [ i ] = ( * this )[ i ] * r [ i ];
return ret ;
}
// 前 sz 項を取ってくる。sz に足りない項は 0 埋めする
FPS pre ( int sz ) const {
FPS ret ( begin ( * this ), begin ( * this ) + min (( int ) this -> size (), sz ));
if (( int ) ret . size () < sz ) ret . resize ( sz );
return ret ;
}
FPS operator >> ( int sz ) const {
if (( int ) this -> size () <= sz ) return {};
FPS ret ( * this );
ret . erase ( ret . begin (), ret . begin () + sz );
return ret ;
}
FPS operator << ( int sz ) const {
FPS ret ( * this );
ret . insert ( ret . begin (), sz , mint ( 0 ));
return ret ;
}
FPS diff () const {
const int n = ( int ) this -> size ();
FPS ret ( max ( 0 , n - 1 ));
mint one ( 1 ), coeff ( 1 );
for ( int i = 1 ; i < n ; i ++ ) {
ret [ i - 1 ] = ( * this )[ i ] * coeff ;
coeff += one ;
}
return ret ;
}
FPS integral () const {
const int n = ( int ) this -> size ();
FPS ret ( n + 1 );
ret [ 0 ] = mint ( 0 );
if ( n > 0 ) ret [ 1 ] = mint ( 1 );
auto mod = mint :: get_mod ();
for ( int i = 2 ; i <= n ; i ++ ) ret [ i ] = ( - ret [ mod % i ]) * ( mod / i );
for ( int i = 0 ; i < n ; i ++ ) ret [ i + 1 ] *= ( * this )[ i ];
return ret ;
}
mint eval ( mint x ) const {
mint r = 0 , w = 1 ;
for ( auto & v : * this ) r += w * v , w *= x ;
return r ;
}
FPS log ( int deg = - 1 ) const {
assert ( ! ( * this ). empty () && ( * this )[ 0 ] == mint ( 1 ));
if ( deg == - 1 ) deg = ( int ) this -> size ();
return ( this -> diff () * this -> inv ( deg )). pre ( deg - 1 ). integral ();
}
FPS pow ( int64_t k , int deg = - 1 ) const {
const int n = ( int ) this -> size ();
if ( deg == - 1 ) deg = n ;
if ( k == 0 ) {
FPS ret ( deg );
if ( deg ) ret [ 0 ] = 1 ;
return ret ;
}
for ( int i = 0 ; i < n ; i ++ ) {
if (( * this )[ i ] != mint ( 0 )) {
mint rev = mint ( 1 ) / ( * this )[ i ];
FPS ret = ((( * this * rev ) >> i ). log ( deg ) * k ). exp ( deg );
ret *= ( * this )[ i ]. pow ( k );
ret = ( ret << ( i * k )). pre ( deg );
if (( int ) ret . size () < deg ) ret . resize ( deg , mint ( 0 ));
return ret ;
}
if ( __int128_t ( i + 1 ) * k >= deg ) return FPS ( deg , mint ( 0 ));
}
return FPS ( deg , mint ( 0 ));
}
static void * ntt_ptr ;
static void set_fft ();
FPS & operator *= ( const FPS & r );
void ntt ();
void intt ();
void ntt_doubling ();
static int ntt_pr ();
FPS inv ( int deg = - 1 ) const ;
FPS exp ( int deg = - 1 ) const ;
};
template < typename mint >
void * FormalPowerSeries < mint >:: ntt_ptr = nullptr ;
template < int N >
struct FPSBackendPriority : FPSBackendPriority < N - 1 > {};
template < >
struct FPSBackendPriority < 0 > {};
template < typename mint >
void FormalPowerSeries < mint >:: set_fft () {
fps_set_fft_impl (( FormalPowerSeries < mint >* ) nullptr , FPSBackendPriority < 1 > {});
}
template < typename mint >
FormalPowerSeries < mint >& FormalPowerSeries < mint >:: operator *= ( const FPS & r ) {
if ( this -> empty () || r . empty ()) {
this -> clear ();
return * this ;
}
return fps_multiply_impl ( * this , r , FPSBackendPriority < 1 > {});
}
template < typename mint >
void FormalPowerSeries < mint >:: ntt () {
fps_ntt_impl ( * this , FPSBackendPriority < 1 > {});
}
template < typename mint >
void FormalPowerSeries < mint >:: intt () {
fps_intt_impl ( * this , FPSBackendPriority < 1 > {});
}
template < typename mint >
void FormalPowerSeries < mint >:: ntt_doubling () {
fps_ntt_doubling_impl ( * this , FPSBackendPriority < 1 > {});
}
template < typename mint >
int FormalPowerSeries < mint >:: ntt_pr () {
return fps_ntt_pr_impl (( FormalPowerSeries < mint >* ) nullptr ,
FPSBackendPriority < 1 > {});
}
template < typename mint >
FormalPowerSeries < mint > FormalPowerSeries < mint >:: inv ( int deg ) const {
return fps_inv_impl ( * this , deg , FPSBackendPriority < 1 > {});
}
template < typename mint >
FormalPowerSeries < mint > FormalPowerSeries < mint >:: exp ( int deg ) const {
return fps_exp_impl ( * this , deg , FPSBackendPriority < 1 > {});
}
/**
* @brief 多項式/形式的冪級数ライブラリ
*/
#line 7 "fps/arbitrary-fps.hpp"
template < typename mint >
void fps_set_fft_impl ( FormalPowerSeries < mint >* , FPSBackendPriority < 0 > ) {
FormalPowerSeries < mint >:: ntt_ptr = nullptr ;
}
template < typename mint >
void fps_ntt_impl ( FormalPowerSeries < mint >& , FPSBackendPriority < 0 > ) {
exit ( 1 );
}
template < typename mint >
void fps_intt_impl ( FormalPowerSeries < mint >& , FPSBackendPriority < 0 > ) {
exit ( 1 );
}
template < typename mint >
void fps_ntt_doubling_impl ( FormalPowerSeries < mint >& , FPSBackendPriority < 0 > ) {
exit ( 1 );
}
template < typename mint >
int fps_ntt_pr_impl ( FormalPowerSeries < mint >* , FPSBackendPriority < 0 > ) {
exit ( 1 );
}
template < typename mint >
FormalPowerSeries < mint >& fps_multiply_impl ( FormalPowerSeries < mint >& f ,
const FormalPowerSeries < mint >& r ,
FPSBackendPriority < 0 > ) {
auto ret = ArbitraryNTT :: multiply ( f , r );
return f = FormalPowerSeries < mint > ( ret . begin (), ret . end ());
}
template < typename mint >
FormalPowerSeries < mint > fps_inv_impl ( const FormalPowerSeries < mint >& f , int deg ,
FPSBackendPriority < 0 > ) {
assert ( f [ 0 ] != mint ( 0 ));
if ( deg == - 1 ) deg = f . size ();
FormalPowerSeries < mint > ret ({ mint ( 1 ) / f [ 0 ]});
for ( int i = 1 ; i < deg ; i <<= 1 )
ret = ( ret + ret - ret * ret * f . pre ( i << 1 )). pre ( i << 1 );
return ret . pre ( deg );
}
template < typename mint >
FormalPowerSeries < mint > fps_exp_impl ( const FormalPowerSeries < mint >& f , int deg ,
FPSBackendPriority < 0 > ) {
assert ( f . size () == 0 || f [ 0 ] == mint ( 0 ));
if ( deg == - 1 ) deg = ( int ) f . size ();
FormalPowerSeries < mint > ret ({ mint ( 1 )});
for ( int i = 1 ; i < deg ; i <<= 1 ) {
ret = ( ret * ( f . pre ( i << 1 ) + mint ( 1 ) - ret . log ( i << 1 ))). pre ( i << 1 );
}
return ret . pre ( deg );
}
#line 2 "internal/internal-math.hpp"
#line 2 "internal/internal-type-traits.hpp"
#line 4 "internal/internal-type-traits.hpp"
using namespace std ;
namespace nyaan_internal {
template < typename T >
using is_broadly_integral =
typename conditional_t < is_integral_v < T > || is_same_v < T , __int128_t > ||
is_same_v < T , __uint128_t > ,
true_type , false_type >:: type ;
template < typename T >
using is_broadly_signed =
typename conditional_t < is_signed_v < T > || is_same_v < T , __int128_t > ,
true_type , false_type >:: type ;
template < typename T >
using is_broadly_unsigned =
typename conditional_t < is_unsigned_v < T > || is_same_v < T , __uint128_t > ,
true_type , false_type >:: type ;
#define ENABLE_VALUE(x) \
template <typename T> \
constexpr bool x##_v = x<T>::value;
ENABLE_VALUE ( is_broadly_integral );
ENABLE_VALUE ( is_broadly_signed );
ENABLE_VALUE ( is_broadly_unsigned );
#undef ENABLE_VALUE
#define ENABLE_HAS_TYPE(var) \
template <class, class = void> \
struct has_##var : false_type {}; \
template <class T> \
struct has_##var<T, void_t<typename T::var>> : true_type {}; \
template <class T> \
constexpr auto has_##var##_v = has_##var<T>::value;
#define ENABLE_HAS_VAR(var) \
template <class, class = void> \
struct has_##var : false_type {}; \
template <class T> \
struct has_##var<T, void_t<decltype(T::var)>> : true_type {}; \
template <class T> \
constexpr auto has_##var##_v = has_##var<T>::value;
} // namespace nyaan_internal
#line 4 "internal/internal-math.hpp"
namespace nyaan_internal {
#line 9 "internal/internal-math.hpp"
using namespace std ;
// a mod p
template < typename T >
T safe_mod ( T a , T p ) {
a %= p ;
if constexpr ( is_broadly_signed_v < T > ) {
if ( a < 0 ) a += p ;
}
return a ;
}
// 返り値:pair(g, x)
// s.t. g = gcd(a, b), xa = g (mod b), 0 <= x < b/g
template < typename T >
pair < T , T > inv_gcd ( T a , T p ) {
static_assert ( is_broadly_signed_v < T > );
a = safe_mod ( a , p );
if ( a == 0 ) return { p , 0 };
T b = p , x = 1 , y = 0 ;
while ( a != 0 ) {
T q = b / a ;
swap ( a , b %= a );
swap ( x , y -= q * x );
}
if ( y < 0 ) y += p / b ;
return { b , y };
}
// 返り値 : a^{-1} mod p
// gcd(a, p) != 1 が必要
template < typename T >
T inv ( T a , T p ) {
static_assert ( is_broadly_signed_v < T > );
a = safe_mod ( a , p );
T b = p , x = 1 , y = 0 ;
while ( a != 0 ) {
T q = b / a ;
swap ( a , b %= a );
swap ( x , y -= q * x );
}
assert ( b == 1 );
return y < 0 ? y + p : y ;
}
// T : 底の型
// U : T*T がオーバーフローしない かつ 指数の型
template < typename T , typename U >
T modpow ( T a , U n , T p ) {
a = safe_mod ( a , p );
T ret = 1 % p ;
while ( n != 0 ) {
if ( n % 2 == 1 ) ret = U ( ret ) * a % p ;
a = U ( a ) * a % p ;
n /= 2 ;
}
return ret ;
}
// 返り値 : pair(rem, mod)
// 解なしのときは {0, 0} を返す
template < typename T >
pair < T , T > crt ( const vector < T >& r , const vector < T >& m ) {
static_assert ( is_broadly_signed_v < T > );
assert ( r . size () == m . size ());
int n = int ( r . size ());
T r0 = 0 , m0 = 1 ;
for ( int i = 0 ; i < n ; i ++ ) {
assert ( 1 <= m [ i ]);
T r1 = safe_mod ( r [ i ], m [ i ]), m1 = m [ i ];
if ( m0 < m1 ) swap ( r0 , r1 ), swap ( m0 , m1 );
if ( m0 % m1 == 0 ) {
if ( r0 % m1 != r1 ) return { 0 , 0 };
continue ;
}
auto [ g , im ] = inv_gcd ( m0 , m1 );
T u1 = m1 / g ;
if (( r1 - r0 ) % g ) return { 0 , 0 };
T x = ( r1 - r0 ) / g % u1 * im % u1 ;
r0 += x * m0 ;
m0 *= u1 ;
if ( r0 < 0 ) r0 += m0 ;
}
return { r0 , m0 };
}
} // namespace nyaan_internal
#line 2 "math/constexpr-primitive-root.hpp"
constexpr unsigned int constexpr_primitive_root ( unsigned int mod ) {
using u32 = unsigned int ;
using u64 = unsigned long long ;
if ( mod == 2 ) return 1 ;
u64 m = mod - 1 , ds [ 32 ] = {}, idx = 0 ;
for ( u64 i = 2 ; i * i <= m ; ++ i ) {
if ( m % i == 0 ) {
ds [ idx ++ ] = i ;
while ( m % i == 0 ) m /= i ;
}
}
if ( m != 1 ) ds [ idx ++ ] = m ;
for ( u32 _pr = 2 , flg = true ;; _pr ++ , flg = true ) {
for ( u32 i = 0 ; i < idx && flg ; ++ i ) {
u64 a = _pr , b = ( mod - 1 ) / ds [ i ], r = 1 ;
for (; b ; a = a * a % mod , b >>= 1 )
if ( b & 1 ) r = r * a % mod ;
if ( r == 1 ) flg = false ;
}
if ( flg == true ) return _pr ;
}
}
#line 2 "modint/arbitrary-modint.hpp"
#line 2 "modint/barrett-reduction.hpp"
#line 4 "modint/barrett-reduction.hpp"
using namespace std ;
struct Barrett {
using u32 = unsigned int ;
using i64 = long long ;
using u64 = unsigned long long ;
u32 m ;
u64 im ;
Barrett () : m (), im () {}
Barrett ( int n ) : m ( n ), im ( u64 ( - 1 ) / m + 1 ) {}
constexpr inline i64 quo ( u64 n ) {
u64 x = u64 (( __uint128_t ( n ) * im ) >> 64 );
u32 r = n - x * m ;
return m <= r ? x - 1 : x ;
}
constexpr inline i64 rem ( u64 n ) {
u64 x = u64 (( __uint128_t ( n ) * im ) >> 64 );
u32 r = n - x * m ;
return m <= r ? r + m : r ;
}
constexpr inline pair < i64 , int > quorem ( u64 n ) {
u64 x = u64 (( __uint128_t ( n ) * im ) >> 64 );
u32 r = n - x * m ;
if ( m <= r ) return { x - 1 , r + m };
return { x , r };
}
constexpr inline i64 pow ( u64 n , i64 p ) {
u32 a = rem ( n ), r = m == 1 ? 0 : 1 ;
while ( p ) {
if ( p & 1 ) r = rem ( u64 ( r ) * a );
a = rem ( u64 ( a ) * a );
p >>= 1 ;
}
return r ;
}
};
#line 4 "modint/arbitrary-modint.hpp"
template < int id >
struct ArbitraryModIntBase {
int x ;
ArbitraryModIntBase () : x ( 0 ) {}
ArbitraryModIntBase ( int64_t y ) {
int z = y % get_mod ();
if ( z < 0 ) z += get_mod ();
x = z ;
}
ArbitraryModIntBase & operator += ( const ArbitraryModIntBase & p ) {
if (( x += p . x ) >= get_mod ()) x -= get_mod ();
return * this ;
}
ArbitraryModIntBase & operator -= ( const ArbitraryModIntBase & p ) {
if (( x += get_mod () - p . x ) >= get_mod ()) x -= get_mod ();
return * this ;
}
ArbitraryModIntBase & operator *= ( const ArbitraryModIntBase & p ) {
x = rem (( unsigned long long ) x * p . x );
return * this ;
}
ArbitraryModIntBase & operator /= ( const ArbitraryModIntBase & p ) {
* this *= p . inverse ();
return * this ;
}
ArbitraryModIntBase operator - () const { return ArbitraryModIntBase ( - x ); }
ArbitraryModIntBase operator + () const { return * this ; }
ArbitraryModIntBase operator + ( const ArbitraryModIntBase & p ) const {
return ArbitraryModIntBase ( * this ) += p ;
}
ArbitraryModIntBase operator - ( const ArbitraryModIntBase & p ) const {
return ArbitraryModIntBase ( * this ) -= p ;
}
ArbitraryModIntBase operator * ( const ArbitraryModIntBase & p ) const {
return ArbitraryModIntBase ( * this ) *= p ;
}
ArbitraryModIntBase operator / ( const ArbitraryModIntBase & p ) const {
return ArbitraryModIntBase ( * this ) /= p ;
}
bool operator == ( const ArbitraryModIntBase & p ) const { return x == p . x ; }
bool operator != ( const ArbitraryModIntBase & p ) const { return x != p . x ; }
ArbitraryModIntBase inverse () const {
int a = x , b = get_mod (), u = 1 , v = 0 , t ;
while ( b > 0 ) {
t = a / b ;
swap ( a -= t * b , b );
swap ( u -= t * v , v );
}
return ArbitraryModIntBase ( u );
}
ArbitraryModIntBase pow ( int64_t n ) const {
ArbitraryModIntBase ret ( 1 ), mul ( x );
while ( n > 0 ) {
if ( n & 1 ) ret *= mul ;
mul *= mul ;
n >>= 1 ;
}
return ret ;
}
friend ostream & operator << ( ostream & os , const ArbitraryModIntBase & p ) {
return os << p . x ;
}
friend istream & operator >> ( istream & is , ArbitraryModIntBase & a ) {
int64_t t ;
is >> t ;
a = ArbitraryModIntBase ( t );
return ( is );
}
int get () const { return x ; }
inline unsigned int rem ( unsigned long long p ) { return barrett (). rem ( p ); }
static inline Barrett & barrett () {
static Barrett b ;
return b ;
}
static inline int & get_mod () {
static int mod = 0 ;
return mod ;
}
static void set_mod ( int md ) {
assert ( 0 < md && md <= ( 1LL << 30 ) - 1 );
get_mod () = md ;
barrett () = Barrett ( md );
}
};
using ArbitraryModInt = ArbitraryModIntBase <- 1 > ;
/**
* @brief modint (2^{30} 未満の任意 mod 用)
*/
#line 2 "prime/fast-factorize.hpp"
#line 6 "prime/fast-factorize.hpp"
using namespace std ;
#line 2 "misc/rng.hpp"
#line 7 "misc/rng.hpp"
using namespace std ;
#line 2 "internal/internal-seed.hpp"
#line 4 "internal/internal-seed.hpp"
using namespace std ;
namespace nyaan_internal {
unsigned long long non_deterministic_seed () {
unsigned long long m =
chrono :: duration_cast < chrono :: nanoseconds > (
chrono :: high_resolution_clock :: now (). time_since_epoch ())
. count ();
m ^= 9845834732710364265uLL ;
m ^= m << 24 , m ^= m >> 31 , m ^= m << 35 ;
return m ;
}
unsigned long long deterministic_seed () { return 88172645463325252UL ; }
// 64 bit の seed 値を生成 (手元では seed 固定)
// 連続で呼び出すと同じ値が何度も返ってくるので注意
// #define RANDOMIZED_SEED するとシードがランダムになる
unsigned long long seed () {
#if defined(NyaanLocal) && !defined(RANDOMIZED_SEED)
return deterministic_seed ();
#else
return non_deterministic_seed ();
#endif
}
} // namespace nyaan_internal
#line 10 "misc/rng.hpp"
namespace my_rand {
using i64 = long long ;
using u64 = unsigned long long ;
// [0, 2^64 - 1)
u64 rng () {
static u64 _x = nyaan_internal :: seed ();
return _x ^= _x << 7 , _x ^= _x >> 9 ;
}
// [l, r]
i64 rng ( i64 l , i64 r ) {
assert ( l <= r );
return l + rng () % u64 ( r - l + 1 );
}
// [l, r)
i64 randint ( i64 l , i64 r ) {
assert ( l < r );
return l + rng () % u64 ( r - l );
}
// choose n numbers from [l, r) without overlapping
vector < i64 > randset ( i64 l , i64 r , i64 n ) {
assert ( l <= r && n <= r - l );
unordered_set < i64 > s ;
for ( i64 i = n ; i ; -- i ) {
i64 m = randint ( l , r + 1 - i );
if ( s . find ( m ) != s . end ()) m = r - i ;
s . insert ( m );
}
vector < i64 > ret ;
for ( auto & x : s ) ret . push_back ( x );
sort ( begin ( ret ), end ( ret ));
return ret ;
}
// [0.0, 1.0)
double rnd () { return rng () * 5.42101086242752217004e-20 ; }
// [l, r)
double rnd ( double l , double r ) {
assert ( l < r );
return l + rnd () * ( r - l );
}
template < typename T >
void randshf ( vector < T >& v ) {
int n = v . size ();
for ( int i = 1 ; i < n ; i ++ ) swap ( v [ i ], v [ randint ( 0 , i + 1 )]);
}
} // namespace my_rand
using my_rand :: randint ;
using my_rand :: randset ;
using my_rand :: randshf ;
using my_rand :: rnd ;
using my_rand :: rng ;
#line 2 "modint/arbitrary-montgomery-modint.hpp"
#line 4 "modint/arbitrary-montgomery-modint.hpp"
using namespace std ;
template < typename Int , typename UInt , typename Long , typename ULong , int id >
struct ArbitraryLazyMontgomeryModIntBase {
using mint = ArbitraryLazyMontgomeryModIntBase ;
inline static UInt mod ;
inline static UInt r ;
inline static UInt n2 ;
static constexpr int bit_length = sizeof ( UInt ) * 8 ;
static UInt get_r () {
UInt ret = mod ;
while ( mod * ret != 1 ) ret *= UInt ( 2 ) - mod * ret ;
return ret ;
}
static void set_mod ( UInt m ) {
assert ( m < ( UInt ( 1u ) << ( bit_length - 2 )));
assert (( m & 1 ) == 1 );
mod = m , n2 = - ULong ( m ) % m , r = get_r ();
}
UInt a ;
ArbitraryLazyMontgomeryModIntBase () : a ( 0 ) {}
ArbitraryLazyMontgomeryModIntBase ( const Long & b )
: a ( reduce ( ULong ( b % mod + mod ) * n2 )){};
static UInt reduce ( const ULong & b ) {
return ( b + ULong ( UInt ( b ) * UInt ( - r )) * mod ) >> bit_length ;
}
mint & operator += ( const mint & b ) {
if ( Int ( a += b . a - 2 * mod ) < 0 ) a += 2 * mod ;
return * this ;
}
mint & operator -= ( const mint & b ) {
if ( Int ( a -= b . a ) < 0 ) a += 2 * mod ;
return * this ;
}
mint & operator *= ( const mint & b ) {
a = reduce ( ULong ( a ) * b . a );
return * this ;
}
mint & operator /= ( const mint & b ) {
* this *= b . inverse ();
return * this ;
}
mint operator + ( const mint & b ) const { return mint ( * this ) += b ; }
mint operator - ( const mint & b ) const { return mint ( * this ) -= b ; }
mint operator * ( const mint & b ) const { return mint ( * this ) *= b ; }
mint operator / ( const mint & b ) const { return mint ( * this ) /= b ; }
bool operator == ( const mint & b ) const {
return ( a >= mod ? a - mod : a ) == ( b . a >= mod ? b . a - mod : b . a );
}
bool operator != ( const mint & b ) const {
return ( a >= mod ? a - mod : a ) != ( b . a >= mod ? b . a - mod : b . a );
}
mint operator - () const { return mint ( 0 ) - mint ( * this ); }
mint operator + () const { return mint ( * this ); }
mint pow ( ULong n ) const {
mint ret ( 1 ), mul ( * this );
while ( n > 0 ) {
if ( n & 1 ) ret *= mul ;
mul *= mul , n >>= 1 ;
}
return ret ;
}
friend ostream & operator << ( ostream & os , const mint & b ) {
return os << b . get ();
}
friend istream & operator >> ( istream & is , mint & b ) {
Long t ;
is >> t ;
b = ArbitraryLazyMontgomeryModIntBase ( t );
return ( is );
}
mint inverse () const {
Int x = get (), y = get_mod (), u = 1 , v = 0 ;
while ( y > 0 ) {
Int t = x / y ;
swap ( x -= t * y , y );
swap ( u -= t * v , v );
}
return mint { u };
}
UInt get () const {
UInt ret = reduce ( a );
return ret >= mod ? ret - mod : ret ;
}
static UInt get_mod () { return mod ; }
};
// id に適当な乱数を割り当てて使う
template < int id >
using ArbitraryLazyMontgomeryModInt =
ArbitraryLazyMontgomeryModIntBase < int , unsigned int , long long ,
unsigned long long , id > ;
template < int id >
using ArbitraryLazyMontgomeryModInt64bit =
ArbitraryLazyMontgomeryModIntBase < long long , unsigned long long , __int128_t ,
__uint128_t , id > ;
#line 2 "prime/miller-rabin.hpp"
#line 4 "prime/miller-rabin.hpp"
using namespace std ;
#line 8 "prime/miller-rabin.hpp"
namespace fast_factorize {
template < typename T , typename U >
bool miller_rabin ( const T & n , vector < T > ws ) {
if ( n <= 2 ) return n == 2 ;
if ( n % 2 == 0 ) return false ;
T d = n - 1 ;
while ( d % 2 == 0 ) d /= 2 ;
U e = 1 , rev = n - 1 ;
for ( T w : ws ) {
if ( w % n == 0 ) continue ;
T t = d ;
U y = nyaan_internal :: modpow < T , U > ( w , t , n );
while ( t != n - 1 && y != e && y != rev ) y = y * y % n , t *= 2 ;
if ( y != rev && t % 2 == 0 ) return false ;
}
return true ;
}
bool miller_rabin_u64 ( unsigned long long n ) {
return miller_rabin < unsigned long long , __uint128_t > (
n , { 2 , 325 , 9375 , 28178 , 450775 , 9780504 , 1795265022 });
}
template < typename mint >
bool miller_rabin ( unsigned long long n , vector < unsigned long long > ws ) {
if ( n <= 2 ) return n == 2 ;
if ( n % 2 == 0 ) return false ;
if ( mint :: get_mod () != n ) mint :: set_mod ( n );
unsigned long long d = n - 1 ;
while ( ~ d & 1 ) d >>= 1 ;
mint e = 1 , rev = n - 1 ;
for ( unsigned long long w : ws ) {
if ( w % n == 0 ) continue ;
unsigned long long t = d ;
mint y = mint ( w ). pow ( t );
while ( t != n - 1 && y != e && y != rev ) y *= y , t *= 2 ;
if ( y != rev && t % 2 == 0 ) return false ;
}
return true ;
}
bool is_prime ( unsigned long long n ) {
using mint32 = ArbitraryLazyMontgomeryModInt < 96229631 > ;
using mint64 = ArbitraryLazyMontgomeryModInt64bit < 622196072 > ;
if ( n <= 2 ) return n == 2 ;
if ( n % 2 == 0 ) return false ;
if ( n < ( 1uLL << 30 )) {
return miller_rabin < mint32 > ( n , { 2 , 7 , 61 });
} else if ( n < ( 1uLL << 62 )) {
return miller_rabin < mint64 > (
n , { 2 , 325 , 9375 , 28178 , 450775 , 9780504 , 1795265022 });
} else {
return miller_rabin_u64 ( n );
}
}
} // namespace fast_factorize
using fast_factorize :: is_prime ;
/**
* @brief Miller-Rabin primality test
*/
#line 12 "prime/fast-factorize.hpp"
namespace fast_factorize {
using u64 = uint64_t ;
template < typename mint , typename T >
T pollard_rho ( T n ) {
if ( ~ n & 1 ) return 2 ;
if ( is_prime ( n )) return n ;
if ( mint :: get_mod () != n ) mint :: set_mod ( n );
mint R , one = 1 ;
auto f = [ & ]( mint x ) { return x * x + R ; };
auto rnd_ = [ & ]() { return rng () % ( n - 2 ) + 2 ; };
while ( 1 ) {
mint x , y , ys , q = one ;
R = rnd_ (), y = rnd_ ();
T g = 1 ;
constexpr int m = 128 ;
for ( int r = 1 ; g == 1 ; r <<= 1 ) {
x = y ;
for ( int i = 0 ; i < r ; ++ i ) y = f ( y );
for ( int k = 0 ; g == 1 && k < r ; k += m ) {
ys = y ;
for ( int i = 0 ; i < m && i < r - k ; ++ i ) q *= x - ( y = f ( y ));
g = gcd ( q . get (), n );
}
}
if ( g == n ) do
g = gcd (( x - ( ys = f ( ys ))). get (), n );
while ( g == 1 );
if ( g != n ) return g ;
}
exit ( 1 );
}
using i64 = long long ;
vector < i64 > inner_factorize ( u64 n ) {
using mint32 = ArbitraryLazyMontgomeryModInt < 452288976 > ;
using mint64 = ArbitraryLazyMontgomeryModInt64bit < 401243123 > ;
if ( n <= 1 ) return {};
u64 p ;
if ( n <= ( 1LL << 30 )) {
p = pollard_rho < mint32 , uint32_t > ( n );
} else if ( n <= ( 1LL << 62 )) {
p = pollard_rho < mint64 , uint64_t > ( n );
} else {
exit ( 1 );
}
if ( p == n ) return { i64 ( p )};
auto l = inner_factorize ( p );
auto r = inner_factorize ( n / p );
copy ( begin ( r ), end ( r ), back_inserter ( l ));
return l ;
}
vector < i64 > factorize ( u64 n ) {
auto ret = inner_factorize ( n );
sort ( begin ( ret ), end ( ret ));
return ret ;
}
map < i64 , i64 > factor_count ( u64 n ) {
map < i64 , i64 > mp ;
for ( auto & x : factorize ( n )) mp [ x ] ++ ;
return mp ;
}
vector < i64 > divisors ( u64 n ) {
if ( n == 0 ) return {};
vector < pair < i64 , i64 >> v ;
for ( auto & p : factorize ( n )) {
if ( v . empty () || v . back (). first != p ) {
v . emplace_back ( p , 1 );
} else {
v . back (). second ++ ;
}
}
vector < i64 > ret ;
auto f = [ & ]( auto rc , int i , i64 x ) -> void {
if ( i == ( int ) v . size ()) {
ret . push_back ( x );
return ;
}
rc ( rc , i + 1 , x );
for ( int j = 0 ; j < v [ i ]. second ; j ++ ) rc ( rc , i + 1 , x *= v [ i ]. first );
};
f ( f , 0 , 1 );
sort ( begin ( ret ), end ( ret ));
return ret ;
}
} // namespace fast_factorize
using fast_factorize :: divisors ;
using fast_factorize :: factor_count ;
using fast_factorize :: factorize ;
/**
* @brief 高速素因数分解(Miller Rabin/Pollard's Rho)
*/
#line 2 "ntt/chirp-z.hpp"
// f(A W^0), f(A W^1), ..., f(A W^{N-1}) を返す
template < typename fps >
fps ChirpZ ( fps f , typename fps :: value_type W , int N = - 1 ,
typename fps :: value_type A = 1 ) {
using mint = typename fps :: value_type ;
if ( N == - 1 ) N = f . size ();
if ( f . empty () or N == 0 ) return fps ( N , mint {});
int M = f . size ();
if ( A != 1 ) {
mint x = 1 ;
for ( int i = 0 ; i < M ; i ++ ) f [ i ] *= x , x *= A ;
}
if ( W == 0 ) {
fps F ( N , f [ 0 ]);
for ( int i = 1 ; i < M ; i ++ ) F [ 0 ] += f [ i ];
return F ;
}
fps wc ( N + M ), iwc ( max ( N , M ));
mint ws = 1 , iW = W . inverse (), iws = 1 ;
wc [ 0 ] = 1 , iwc [ 0 ] = 1 ;
for ( int i = 1 ; i < N + M ; i ++ ) wc [ i ] = ws * wc [ i - 1 ], ws *= W ;
for ( int i = 1 ; i < max ( N , M ); i ++ ) iwc [ i ] = iws * iwc [ i - 1 ], iws *= iW ;
for ( int i = 0 ; i < M ; i ++ ) f [ i ] *= iwc [ i ];
reverse ( begin ( f ), end ( f ));
fps g = f * wc ;
fps F { begin ( g ) + M - 1 , begin ( g ) + M + N - 1 };
for ( int i = 0 ; i < N ; i ++ ) F [ i ] *= iwc [ i ];
return F ;
}
/**
* @brief Chirp Z-transform(Bluestein's algorithm)
*/
#line 2 "ntt/multidimensional-ntt.hpp"
#line 7 "ntt/multidimensional-ntt.hpp"
using namespace std ;
#line 2 "internal/internal-function.hpp"
#line 8 "internal/internal-function.hpp"
namespace nyaan_internal {
template < class >
class function_ref ;
template < class R , class ... Args >
class function_ref < R ( Args ...) > {
void * obj_ = nullptr ;
R ( * call_obj_ )( void * , Args ...) = nullptr ;
R ( * func_ )( Args ...) = nullptr ;
public:
function_ref () noexcept = default ;
function_ref ( std :: nullptr_t ) noexcept {}
function_ref ( R ( * f )( Args ...)) noexcept : func_ ( f ) {}
template <
class F , class Fn = std :: remove_reference_t < F >,
class = std :: enable_if_t <
std :: is_lvalue_reference_v < F &&> &&
! std :: is_same_v < std :: decay_t < F > , function_ref > &&
! std :: is_pointer_v < std :: decay_t < F >> && ! std :: is_function_v < Fn > &&
std :: is_invocable_r_v < R , Fn & , Args ... >>>
function_ref ( F && f ) noexcept {
obj_ = const_cast < void *> ( static_cast < const void *> ( std :: addressof ( f )));
call_obj_ = []( void * p , Args ... args ) -> R {
return std :: invoke ( * static_cast < Fn *> ( p ), std :: forward < Args > ( args )...);
};
}
R operator ()( Args ... args ) const {
if ( call_obj_ ) {
return call_obj_ ( obj_ , std :: forward < Args > ( args )...);
}
if ( ! func_ ) throw std :: bad_function_call ();
return func_ ( std :: forward < Args > ( args )...);
}
explicit operator bool () const noexcept {
return call_obj_ != nullptr || func_ != nullptr ;
}
};
template < class , std :: size_t Capacity = 32 ,
std :: size_t Align = alignof ( std :: max_align_t )>
class inplace_function ;
template < class R , class ... Args , std :: size_t Capacity , std :: size_t Align >
class inplace_function < R ( Args ...), Capacity , Align > {
using storage_t = typename std :: aligned_storage < Capacity , Align >:: type ;
storage_t storage_ ;
R ( * invoke_ )( void * , Args && ...) = nullptr ;
void ( * copy_ )( void * , const void * ) = nullptr ;
void ( * move_ )( void * , void * ) = nullptr ;
void ( * destroy_ )( void * ) = nullptr ;
template < class F >
static R invoke_impl ( void * p , Args && ... args ) {
return std :: invoke ( * static_cast < F *> ( p ), std :: forward < Args > ( args )...);
}
template < class F >
static void copy_impl ( void * dst , const void * src ) {
new ( dst ) F ( * static_cast < const F *> ( src ));
}
template < class F >
static void move_impl ( void * dst , void * src ) {
if constexpr ( std :: is_move_constructible_v < F > ) {
new ( dst ) F ( std :: move ( * static_cast < F *> ( src )));
} else {
new ( dst ) F ( * static_cast < F *> ( src ));
}
}
template < class F >
static void destroy_impl ( void * p ) {
static_cast < F *> ( p ) ->~ F ();
}
template < class F >
void emplace ( F && f ) {
using Fn = std :: decay_t < F > ;
static_assert ( std :: is_invocable_r_v < R , Fn & , Args ... > ,
"inplace_function target is not invocable with this signature" );
static_assert ( sizeof ( Fn ) <= Capacity ,
"inplace_function target is too large; increase Capacity" );
static_assert ( alignof ( Fn ) <= Align ,
"inplace_function target alignment is too strict; increase Align" );
static_assert ( std :: is_copy_constructible_v < Fn > ,
"inplace_function target must be copy constructible" );
if constexpr ( std :: is_pointer_v < Fn > ) {
if ( f == nullptr ) return ;
}
if constexpr ( std :: is_move_constructible_v < Fn > ||
std :: is_lvalue_reference_v < F > ) {
new ( & storage_ ) Fn ( std :: forward < F > ( f ));
} else {
new ( & storage_ ) Fn ( f );
}
invoke_ = & invoke_impl < Fn > ;
copy_ = & copy_impl < Fn > ;
move_ = & move_impl < Fn > ;
destroy_ = & destroy_impl < Fn > ;
}
public:
inplace_function () noexcept = default ;
inplace_function ( std :: nullptr_t ) noexcept {}
~ inplace_function () { reset (); }
inplace_function ( const inplace_function & other ) {
if ( other ) {
other . copy_ ( & storage_ , & other . storage_ );
invoke_ = other . invoke_ ;
copy_ = other . copy_ ;
move_ = other . move_ ;
destroy_ = other . destroy_ ;
}
}
inplace_function ( inplace_function && other ) {
if ( other ) {
other . move_ ( & storage_ , & other . storage_ );
invoke_ = other . invoke_ ;
copy_ = other . copy_ ;
move_ = other . move_ ;
destroy_ = other . destroy_ ;
other . reset ();
}
}
template <
class F , class Fn = std :: decay_t < F >,
class = std :: enable_if_t <! std :: is_same_v < Fn , inplace_function > &&
! std :: is_same_v < Fn , std :: nullptr_t >>>
inplace_function ( F && f ) {
emplace ( std :: forward < F > ( f ));
}
inplace_function & operator = ( const inplace_function & other ) {
if ( this == & other ) return * this ;
reset ();
if ( other ) {
other . copy_ ( & storage_ , & other . storage_ );
invoke_ = other . invoke_ ;
copy_ = other . copy_ ;
move_ = other . move_ ;
destroy_ = other . destroy_ ;
}
return * this ;
}
inplace_function & operator = ( inplace_function && other ) {
if ( this == & other ) return * this ;
reset ();
if ( other ) {
other . move_ ( & storage_ , & other . storage_ );
invoke_ = other . invoke_ ;
copy_ = other . copy_ ;
move_ = other . move_ ;
destroy_ = other . destroy_ ;
other . reset ();
}
return * this ;
}
template <
class F , class Fn = std :: decay_t < F >,
class = std :: enable_if_t <! std :: is_same_v < Fn , inplace_function > &&
! std :: is_same_v < Fn , std :: nullptr_t >>>
inplace_function & operator = ( F && f ) {
reset ();
emplace ( std :: forward < F > ( f ));
return * this ;
}
inplace_function & operator = ( std :: nullptr_t ) noexcept {
reset ();
return * this ;
}
void reset () noexcept {
if ( destroy_ ) destroy_ ( & storage_ );
invoke_ = nullptr ;
copy_ = nullptr ;
move_ = nullptr ;
destroy_ = nullptr ;
}
explicit operator bool () const noexcept { return invoke_ != nullptr ; }
R operator ()( Args ... args ) const {
if ( ! invoke_ ) throw std :: bad_function_call ();
return invoke_ (
const_cast < void *> ( static_cast < const void *> ( & storage_ )),
std :: forward < Args > ( args )...);
}
};
} // namespace nyaan_internal
using nyaan_internal :: function_ref ;
using nyaan_internal :: inplace_function ;
#line 10 "ntt/multidimensional-ntt.hpp"
// f(vector<mint>& a, bool rev) : 1 次元 DFT (rev は逆変換かどうか)
template < typename T ,
typename DFT = nyaan_internal :: inplace_function < void ( vector < T > & , bool ), 64 >>
struct MultidimensionalFourierTransform {
static_assert ( is_invocable_r_v < void , DFT & , vector < T >& , bool > ,
"DFT must be callable as void(vector<T>&, bool)" );
vector < int > base ;
DFT dft1d ;
template < typename F >
MultidimensionalFourierTransform ( const vector < int >& bs , F && f )
: base ( bs ), dft1d ( std :: forward < F > ( f )) {}
bool ascend ( vector < int >& v ) {
int i = 0 ;
v [ i ] += 1 ;
while ( v [ i ] == base [ i ]) {
if ( i == ( int ) v . size () - 1 ) return false ;
v [ i ] = 0 ;
v [ ++ i ] += 1 ;
}
return true ;
}
int operator ()( vector < int >& a ) {
int res = a [ 0 ], coeff = 1 ;
for ( int i = 1 ; i < ( int ) a . size (); i ++ )
coeff *= base [ i - 1 ], res += coeff * a [ i ];
return res ;
}
void inner ( vector < T >& a , int dim , bool rev = false ) {
int i = 0 , shift = 1 , n = base [ dim ];
vector < T > f ( n );
vector < int > id ( base . size ());
for ( int j = 0 ; j < dim ; j ++ ) shift *= base [ j ];
do {
if ( id [ dim ] != 0 ) continue ;
for ( int j = 0 , t = i ; j < n ; j ++ , t += shift ) f [ j ] = a [ t ];
std :: invoke ( dft1d , f , rev );
for ( int j = 0 , t = i ; j < n ; j ++ , t += shift ) a [ t ] = f [ j ];
id [ dim ] = 0 ;
} while ( ++ i && ascend ( id ));
}
void fft ( vector < T >& a , bool rev = false ) {
if ( ! rev )
for ( int i = 0 ; i < ( int ) base . size (); i ++ ) inner ( a , i );
else
for ( int i = ( int ) base . size (); i -- ;) inner ( a , i , true );
}
};
/**
* @brief 多次元FFT
*/
#line 15 "ntt/multivariate-circular-convolution.hpp"
template < typename mint >
FormalPowerSeries < mint > multivariate_circular_convolution (
const FormalPowerSeries < mint >& f , const FormalPowerSeries < mint >& g ,
const vector < int >& base ) {
int prod = 1 ;
for ( auto & b : base ) prod *= b ;
assert (( int ) f . size () == prod && ( int ) g . size () == prod );
vector < int > primes ;
for ( int p = 900000000 / prod * prod + 1 ; ( int ) primes . size () < 3 ; p += prod ) {
if ( is_prime ( p )) primes . push_back ( p );
}
vector < vector < int >> buf ;
using submint = ArbitraryModIntBase < 20230528 > ;
for ( int p : primes ) {
submint :: set_mod ( p );
int proot = constexpr_primitive_root ( p );
unordered_map < int , pair < submint , submint >> len_to_W ;
for ( auto & b : base ) {
submint w = submint { proot }. pow (( p - 1 ) / b );
submint iw = w . inverse ();
len_to_W [ b ] = { w , iw };
}
FormalPowerSeries < submint > s ( prod ), t ( prod );
for ( int i = 0 ; i < prod ; i ++ ) s [ i ] = f [ i ]. get (), t [ i ] = g [ i ]. get ();
auto dft = [ & ]( vector < submint >& v , bool rev ) -> void {
auto & val = len_to_W [ v . size ()];
submint w = rev ? val . second : val . first ;
auto res = ChirpZ < FormalPowerSeries < submint >> ({ begin ( v ), end ( v )}, w );
v = vector < submint > { begin ( res ), end ( res )};
};
MultidimensionalFourierTransform < submint > mdft ( base , dft );
mdft . fft ( s ), mdft . fft ( t );
for ( int i = 0 ; i < prod ; i ++ ) s [ i ] *= t [ i ];
mdft . fft ( s , true );
submint iprod = submint { prod }. inverse ();
vector < int > res ;
for ( auto & x : s ) res . push_back (( x * iprod ). get ());
buf . push_back ( res );
}
FormalPowerSeries < mint > h ;
auto m = mint :: get_mod ();
vector < __int128_t > rem ( 3 ), mod ( 3 );
for ( int j = 0 ; j < 3 ; j ++ ) mod [ j ] = primes [ j ];
for ( int i = 0 ; i < prod ; i ++ ) {
for ( int j = 0 ; j < 3 ; j ++ ) rem [ j ] = buf [ j ][ i ];
h . push_back ( nyaan_internal :: crt ( rem , mod ). first % m );
}
return h ;
}
/**
* @brief 多変数巡回畳み込み
*/
#line 6 "verify/verify-yosupo-ntt/yosupo-multivariate-circular-convolution.test.cpp"
//
using namespace Nyaan ;
void q () {
using mint = ArbitraryModInt ;
using fps = FormalPowerSeries < mint > ;
inl ( p , K );
mint :: set_mod ( p );
vi base ( K );
in ( base );
int prod = 1 ;
each ( x , base ) prod *= x ;
fps f ( prod ), g ( prod );
in ( f , g );
out ( multivariate_circular_convolution ( f , g , base ));
}
void Nyaan :: solve () {
int t = 1 ;
// in(t);
while ( t -- ) q ();
}