Logo Pigsyy的博客

博客

标签
暂无

JOI 数据获取脚本

2025-06-26 22:28:29 By Pigsyy
import requests
import os
import zipfile
import shutil
from zipfile import ZipFile
from os.path import basename

root = 'D:/脚本/JOI 数据/JOI 历年数据'
os.chdir(root)

# for year in range(2013, 2018):
#     os.mkdir('JOI' + str(year))
#     os.chdir('JOI' + str(year))
#     url = 'https://www2.ioi-jp.org/joi/' + str(year - 1)
#     url += '/' + str(year) + '-ho/joi' + str(year) + '-ho-data.zip'
#     with open('data.zip', 'wb') as f:
#         f.write(__import__ ("requests").get(url).content)
#     f = zipfile.ZipFile('data.zip', 'r')
#     for file in f.namelist():
#         f.extract(file, './')
#     f.close()
#     os.chdir(root)

# for year in range(2018, 2025):
#     os.mkdir('JOI' + str(year))
#     os.chdir('JOI' + str(year))
#     url = 'https://www2.ioi-jp.org/joi/' + str(year - 1)
#     url += '/' + str(year) + '-ho/' + str(year) + '-ho-data.zip'
#     with open('data.zip', 'wb') as f:
#         f.write(__import__ ("requests").get(url).content)
#     f = zipfile.ZipFile('data.zip', 'r')
#     for file in f.namelist():
#         f.extract(file, './')
#     f.close()
#     os.chdir(root)

# 特殊点处理:
# 1. 2020 年以前,文件下存在新文件夹,需要进入两次
# 2. 2013 年,配点情况 txt 不存在 Subtask,而采用日语。
# 3. 需要特殊处理子任务依赖
# 4. 2021 年以前,需要手动输入时间限制和空间限制。

problem_idx = 117

for year in range(2014, 2025):
    # 进入对应年份的文件夹
    os.chdir('JOI' + str(year))
    # 特殊处理 2020 年以前
    if year < 2020:
        option_a = os.listdir()[0]
        option_b = os.listdir()[1]
        if (option_a != 'data.zip'):
            os.chdir(option_a)
        else:
            os.chdir(option_b)

    # 记录当前目录
    year_catalogue = os.getcwd()

    # 获取 score.txt 文件
    url = 'https://www2.ioi-jp.org/joi/' + str(year - 1) + '/'
    url += str(year) + '-ho/' + str(year) + '-ho-score.txt'
    score = requests.get(url).text

    # 分别处理五道题
    for problem_name in os.listdir():
        # 忽略 data.zip(用于 2020 年及以后)
        if problem_name.find('.') != -1:
            continue
        print('处理' + problem_name + '中 ...')

        os.chdir(problem_name)
        # 记录当前目录,用于回到此处
        problem_catalogue = os.getcwd()

        # 解析 score.txt 文件
        problem_position = score.find(problem_name)

        # 获取 time_limit
        time_limit = 0
        if (score.find('time_limit', problem_position) != -1):
            time_limit_position = score.find('time_limit', problem_position)
            time_limit_position += 12

            while score[time_limit_position] >= '0' and score[time_limit_position] <= '9':
                time_limit = time_limit * 10 + int(score[time_limit_position])
                time_limit_position += 1
        else:
            time_limit = int(input('无法获取 ' + problem_name + ' 题目的时间限制,请手动填写:'))

        # 获取 memory_limit
        memory_limit = 0
        if (score.find('memory_limit', problem_position) != -1):
            memory_limit_position = score.find('memory_limit', problem_position)
            memory_limit_position += 14

            while score[memory_limit_position] >= '0' and score[memory_limit_position] <= '9':
                memory_limit = memory_limit * 10 + int(score[memory_limit_position])
                memory_limit_position += 1
        else:
            memory_limit = int(input('无法获取 ' + problem_name + ' 题目的空间限制,请手动填写:'))

        # print('时间限制:' + str(time_limit))
        # print('空间限制:' + str(memory_limit))

        # 解析 subtask
        subtask_numbers = 0
        subtask_position = problem_position
        next_problem = score.find('-ho-', subtask_position + 10)
        subtask_data = [set()]
        subtask_score = [0]
        while score.find('Subtask', subtask_position) != -1 and (next_problem == -1 or score.find('Subtask', subtask_position) < next_problem):
            subtask_position = score.find('Subtask', subtask_position) + 1

            score_left_position = score.find('(', subtask_position) + 1
            score_right_position = score.find(')', subtask_position)
            subtask_score.append(int(score[score_left_position : score_right_position]))

            content_position = score.find(':', subtask_position) + 1
            subtask_numbers += 1
            subtask_data.append(set())

            data_name = ""
            while score[content_position] != '\n':
                if score[content_position] == '.' or score[content_position] == '-' or score[content_position].isdigit() or score[content_position].isalpha():
                    data_name += score[content_position]
                elif score[content_position] == ',':
                    if data_name.find('.') != -1:
                        data_name = data_name[0 : data_name.find('.')]
                    if data_name[len(data_name) - 1].isalpha():
                        data_name += '-'
                    # print(data_name)
                    subtask_data[subtask_numbers].add(data_name)
                    data_name = ""
                content_position += 1
            if data_name.find('.') != -1:
                data_name = data_name[0 : data_name.find('.')]
            if len(data_name) > 0 and data_name[len(data_name) - 1].isalpha():
                data_name += '-'
            # print(data_name)
            subtask_data[subtask_numbers].add(data_name)


        subtask_dependency = [[]]
        for i in range(0, subtask_numbers):
            subtask_dependency.append([])
            expand = set()
            for j in subtask_data[i + 1]:
                if (len(j) == 0):
                    for k in os.listdir('./in/'):
                        while k[len(k) - 1] != '.':
                            k = k[0 : len(k) - 1]
                        k = k[0 : len(k) - 1]
                        expand.add(k)
                elif (j[len(j) - 1] == '-'):
                    k = 1
                    s = "01"
                    while os.path.exists('./in/' + j + s + '.txt'):
                        k += 1
                        s = str(k)
                        if len(s) == 1:
                            s = '0' + s
                    for idx in range(1, k):
                        s = str(idx)
                        if (len(s) == 1):
                            s = '0' + s
                        # print(j + s)
                        expand.add(j + s)
                else:
                    expand.add(j)
            subtask_data[i + 1] = expand
            # print()

        # for i in range(1, subtask_numbers + 1):
        #     subtask_data[i] = sorted(subtask_data[i])
        #     for data in subtask_data[i]:
        #         print(data, end = " ")
        #     print()

        for i in range(2, subtask_numbers + 1):
            delete = set()
            for j in range(1, i):
                contain = True
                for name_j in subtask_data[j]:
                    if name_j not in subtask_data[i]:
                        contain = False
                        break

                if contain:
                    for k in subtask_data[j]:
                        delete.add(k)
                    subtask_dependency[i].append(j)
            for j in delete:
                subtask_data[i].remove(j)

        # for i in range(1, subtask_numbers + 1):
        #     subtask_data[i] = sorted(subtask_data[i])
        #     subtask_dependency[i] = sorted(subtask_dependency[i])
        #     print('Subtask ' + str(i))
        #     print('数据:', end = " ")
        #     for data in subtask_data[i]:
        #         print(data, end = " ")
        #     print()
        #     print('依赖:', end = " ")
        #     for dependency in subtask_dependency[i]:
        #         print(dependency, end = " ")
        #     print()
        # print()

        # 生成新数据
        idx = 0
        subtask_end = [0]
        for i in range(1, subtask_numbers + 1):
            for data in subtask_data[i]:
                idx += 1
                shutil.copyfile(os.getcwd() + '/in/' + data + '.txt', os.getcwd() + '/data' + str(idx) + '.in')
                shutil.copyfile(os.getcwd() + '/out/' + data + '.txt', os.getcwd() + '/data' + str(idx) + '.out')
            subtask_end.append(idx)

        # 编写 problem.conf 文件
        with open(os.getcwd() + '/problem.conf', 'w') as f:
            f.write('use_builtin_judger on\n')
            f.write('use_builtin_checker wcmp\n')
            f.write('n_tests ' + str(idx) + '\n')
            f.write('n_ex_tests 0\n')
            f.write('n_sample_tests 0\n')
            f.write('input_pre data\n')
            f.write('input_suf in\n')
            f.write('output_pre data\n')
            f.write('output_suf out\n')
            f.write('time_limit ' + str(time_limit) + '\n')
            f.write('memory_limit ' + str(memory_limit) + '\n')
            f.write('n_subtasks ' + str(subtask_numbers) + '\n')
            for i in range(1, subtask_numbers + 1):
                f.write('subtask_end_' + str(i) + ' ' + str(subtask_end[i]) + '\n')
                f.write('subtask_score_' + str(i) + ' ' + str(subtask_score[i]) + '\n')
                if len(subtask_dependency[i]) > 0:
                    f.write('subtask_dependence_' + str(i) + ' many\n')
                    cnt = 0
                    for j in subtask_dependency[i]:
                        cnt += 1
                        f.write('subtask_dependence_' + str(i) + '_' + str(cnt) + ' '+ str(j) + '\n')

        problem_idx += 1
        with ZipFile(root + '/' + str(problem_idx) + '.zip', 'w') as z:
            for f in os.listdir():
                if f.find('.') != -1:
                    z.write(os.getcwd() + '/' + f, arcname = basename(f))

        os.chdir(year_catalogue)

    # 处理完毕,回到根目录
    os.chdir(root)

【通知】WyOJ Round 1 公告

2025-04-14 14:52:45 By Pigsyy

比赛描述

WyOJ Round 1 将于 $2025$ 年 $4$ 月 $14$ 日下午 $14:00$ 举行!比赛将进行 $4$ 个小时,共四道题。

这是 WyOJ 的第一场 WyOJ Round,难度大致介于 $\tt\color{orange}{普及-/提高}$ 到 $\tt\color{purple}{省选/NOI-}$,欢迎大家来玩!

比赛链接:https://oj.ryp.org.cn/contest/3

奖金

本场比赛共设置了 $75$ 元的奖金:

  • 排名奖:本场比赛排名 $1,2,3,4,5,6,7,8$ 的选手分别获得 $20,15,10,5,2,1,1,1$ 元。

  • 一血奖:首位通过第 $1,2,3,4$ 道题目的选手分别获得 $1,3,6,10$ 元。

获奖名单将在赛后总结帖中公布,请获奖者在比赛结束后 $72$ 小时内联系 Pigsyy 领奖,过期作废。

注意:若想赢得奖金,需在洛谷提交

推荐在洛谷和 WyOJ 同时提交。

共 2 篇博客