Hi everyone, I’m having an issue submitting my code for leaderboard scoring. The platform keeps showing:
“Your submission did not output the expected file, so it could not be scored. This may be due to an unhandled exception or syntax error.”
I reviewed the logs and confirmed my submission.zip includes main.py and the required model files, but the submission still fails with the same message.
Has anyone experienced this before? What is the exact expected output filename and path for the phonetic track, and what’s the best way to ensure it gets created correctly during scoring?
Thanks in advance.
main.py
“”"
import json
import traceback
from pathlib import Path
from dataclasses import dataclass
from typing import List
import torch
import torchaudio
from transformers import AutoProcessor, WavLMForCTC
TARGET_SR = 16000
BATCH_SIZE = 16
def read_jsonl(path: Path):
with open(path, “r”, encoding=“utf-8”) as f:
return [json.loads(line) for line in f if line.strip()]
def write_jsonl(path: Path, rows):
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, “w”, encoding=“utf-8”) as f:
for r in rows:
f.write(json.dumps(r, ensure_ascii=False) + “\n”)
def load_audio(path: Path):
wav, sr = torchaudio.load(str(path))
if wav.ndim == 2 and wav.shape[0] > 1:
wav = wav.mean(dim=0, keepdim=True)
if sr != TARGET_SR:
wav = torchaudio.functional.resample(wav, sr, TARGET_SR)
return wav.squeeze(0).contiguous()
@dataclass
class ModelBundle:
processor: any
model: torch.nn.Module
device: torch.device
def load_model(model_dir: Path) → ModelBundle:
print(“[debug] Loading processor from:”, model_dir)
processor = AutoProcessor.from_pretrained(str(model_dir))
print(“[debug] Loading model from:”, model_dir)
model = WavLMForCTC.from_pretrained(str(model_dir))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("[debug] Device:", device)
model.to(device).eval()
return ModelBundle(processor, model, device)
@torch.no_grad()
def predict_batch(bundle: ModelBundle, waves: List[torch.Tensor]):
inputs = bundle.processor(
waves,
sampling_rate=TARGET_SR,
return_tensors=“pt”,
padding=True,
)
input_values = inputs[“input_values”].to(bundle.device)
attention_mask = inputs.get(“attention_mask”)
if attention_mask is not None:
attention_mask = attention_mask.to(bundle.device)
with torch.autocast(
device_type="cuda",
dtype=torch.float16,
enabled=(bundle.device.type == "cuda"),
):
logits = bundle.model(input_values, attention_mask=attention_mask).logits
pred_ids = torch.argmax(logits, dim=-1)
return bundle.processor.batch_decode(pred_ids)
def main():
work_dir = Path.cwd() # /code_execution
data_dir = work_dir / “data”
model_dir = Path(file).parent / “model”
out_path = work_dir / “submission” / “submission.jsonl”
print("[debug] CWD:", work_dir)
print("[debug] data_dir exists:", data_dir.exists())
print("[debug] model_dir exists:", model_dir.exists())
print("[debug] writing to:", out_path)
meta_path = data_dir / "utterance_metadata.jsonl"
print("[debug] meta_path:", meta_path, "exists:", meta_path.exists())
bundle = load_model(model_dir)
metadata = read_jsonl(meta_path)
print("[debug] num utterances:", len(metadata))
results = []
for i in range(0, len(metadata), BATCH_SIZE):
batch = metadata[i:i + BATCH_SIZE]
waves, ids = [], []
for row in batch:
ap = data_dir / row["audio_path"]
waves.append(load_audio(ap))
ids.append(row["utterance_id"])
preds = predict_batch(bundle, waves)
for uid, pred in zip(ids, preds):
results.append({"utterance_id": uid, "phonetic_text": pred.strip()})
if i == 0 or (i + BATCH_SIZE) % (BATCH_SIZE * 20) == 0:
print(f"[debug] processed {min(i + BATCH_SIZE, len(metadata))}/{len(metadata)}")
write_jsonl(out_path, results)
print("[done] wrote:", out_path, "size:", out_path.stat().st_size)
if name == “main”:
try:
main()
except Exception:
print(“[FATAL] Unhandled exception:\n”)
print(traceback.format_exc())
raise
“”"