# RQ-VAE Semantic-ID (SID) generation, contrastive / dual-view variant. # Example config wired to the small Parquet sample under data/sid_example_v2/merged. # Production scale (full data) uses batch_size: 2048 and codebook: 8192 x3. train_input_path: "data/sid_example/merged/*.parquet" eval_input_path: "data/sid_example/merged/*.parquet" model_dir: "experiments/sid_rqvae_contrastive" train_config { sparse_optimizer { adam_optimizer { lr: 0.002 beta1: 0.9 beta2: 0.999 weight_decay: 0.0001 } cosine_annealing_learning_rate { T_max: 50 warmup_size: 10 warmup_learning_rate: 0.0 min_learning_rate: 1e-7 by_epoch: true } } dense_optimizer { adamw_optimizer { lr: 0.002 beta1: 0.9 beta2: 0.999 weight_decay: 0.0001 } cosine_annealing_learning_rate { T_max: 110 warmup_size: 40 warmup_learning_rate: 0.0 min_learning_rate: 1e-7 by_epoch: true } } num_epochs: 150 save_checkpoints_epochs: 10 log_step_count_steps: 100 is_profiling: false } data_config { batch_size: 512 dataset_type: ParquetDataset fg_mode: FG_DAG num_workers: 4 } feature_configs { raw_feature { feature_name: "emb" expression: "item:embedding" value_dim: 512 } } feature_configs { raw_feature { feature_name: "pair_emb" expression: "item:pair_embedding" value_dim: 512 } } feature_configs { raw_feature { feature_name: "is_pair" expression: "item:is_pair" value_dim: 1 } } model_config { feature_groups { group_name: "deep" feature_names: "emb" group_type: DEEP } feature_groups { group_name: "pair" feature_names: "pair_emb" group_type: DEEP } feature_groups { group_name: "pair_flag" feature_names: "is_pair" group_type: DEEP } sid_rqvae { embed_dim: 64 hidden_dims: 256 hidden_dims: 256 codebook: 256 codebook: 256 codebook: 256 forward_mode: "ste" kmeans_init: false contrastive_config { pair_feature_group: "pair" pair_flag_feature_group: "pair_flag" } } losses { recon_loss { recon_type: "l2" } } losses { commitment_loss { latent_weight: 0.5 latent_weight: 0.5 } } losses { contrastive_loss {} } }