LICENSE
README.md
pyproject.toml
cookbooks/agentic_grader/01_native_react_native_tool.py
cookbooks/agentic_grader/02_native_react_langchain_tool.py
cookbooks/agentic_grader/03_langchain_agent.py
cookbooks/agentic_grader/04_agentscope_agent.py
cookbooks/agentic_grader/adapters/agentscope.py
cookbooks/agentic_grader/adapters/langchain.py
cookbooks/auto_arena/__main__.py
cookbooks/auto_arena/auto_arena_pipeline.py
cookbooks/auto_arena/chart_generator.py
cookbooks/auto_arena/query_generator.py
cookbooks/auto_arena/report_generator.py
cookbooks/auto_arena/response_collector.py
cookbooks/auto_arena/schema.py
cookbooks/data_refinement/refinement.py
cookbooks/finance_grader/event_interpretation/event_analysis.py
cookbooks/finance_grader/event_interpretation/event_identification.py
cookbooks/finance_grader/industry_research/characteristics_analysis.py
cookbooks/finance_grader/industry_research/risk_analysis.py
cookbooks/finance_grader/industry_research/underlying_comparison.py
cookbooks/finance_grader/macro_analysis/concept_explanation.py
cookbooks/finance_grader/macro_analysis/macro_analysis.py
cookbooks/finance_grader/stock_analysis/fundamental_analysis.py
cookbooks/finance_grader/stock_analysis/overall_logic.py
cookbooks/finance_grader/stock_analysis/stock_risk_analysis.py
cookbooks/finance_grader/stock_analysis/valuation_analysis.py
cookbooks/finance_grader/stock_search/search_integrity.py
cookbooks/finance_grader/stock_search/search_relevance.py
cookbooks/finance_grader/stock_search/search_timeliness.py
cookbooks/grader_validation/accuracy.py
cookbooks/grader_validation/grader_validator.py
cookbooks/grader_validation/rewardbench2.py
cookbooks/integrations/langsmith.py
cookbooks/multi_turn_dialogue/multi_turn_evaluation.py
cookbooks/pairwise_evaluation/pairwise_evaluation.py
cookbooks/paper_review/__init__.py
cookbooks/paper_review/models.py
cookbooks/paper_review/pipeline.py
cookbooks/paper_review/report.py
cookbooks/paper_review/schema.py
cookbooks/paper_review/utils.py
cookbooks/paper_review/disciplines/__init__.py
cookbooks/paper_review/disciplines/base.py
cookbooks/paper_review/disciplines/biology.py
cookbooks/paper_review/disciplines/chemistry.py
cookbooks/paper_review/disciplines/cs.py
cookbooks/paper_review/disciplines/economics.py
cookbooks/paper_review/disciplines/environmental_science.py
cookbooks/paper_review/disciplines/mathematics.py
cookbooks/paper_review/disciplines/medicine.py
cookbooks/paper_review/disciplines/physics.py
cookbooks/paper_review/disciplines/psychology.py
cookbooks/paper_review/disciplines/social_sciences.py
cookbooks/paper_review/examples/__init__.py
cookbooks/paper_review/examples/bib_verification.py
cookbooks/paper_review/examples/correctness_check.py
cookbooks/paper_review/examples/single_paper_review.py
cookbooks/paper_review/examples/tex_package_review.py
cookbooks/paper_review/graders/__init__.py
cookbooks/paper_review/graders/correctness.py
cookbooks/paper_review/graders/criticality.py
cookbooks/paper_review/graders/format.py
cookbooks/paper_review/graders/jailbreaking.py
cookbooks/paper_review/graders/review.py
cookbooks/paper_review/processors/__init__.py
cookbooks/paper_review/processors/bib_checker.py
cookbooks/paper_review/processors/tex_processor.py
cookbooks/paper_review/prompts/__init__.py
cookbooks/paper_review/prompts/correctness.py
cookbooks/paper_review/prompts/criticality.py
cookbooks/paper_review/prompts/format.py
cookbooks/paper_review/prompts/jailbreaking.py
cookbooks/paper_review/prompts/review.py
cookbooks/ref_hallucination_arena/__main__.py
cookbooks/ref_hallucination_arena/pipeline.py
cookbooks/ref_hallucination_arena/schema.py
cookbooks/ref_hallucination_arena/collectors/__init__.py
cookbooks/ref_hallucination_arena/collectors/bib_extractor.py
cookbooks/ref_hallucination_arena/collectors/response_collector.py
cookbooks/ref_hallucination_arena/loaders/__init__.py
cookbooks/ref_hallucination_arena/loaders/dataset_loader.py
cookbooks/ref_hallucination_arena/reporting/__init__.py
cookbooks/ref_hallucination_arena/reporting/chart_generator.py
cookbooks/ref_hallucination_arena/reporting/report_generator.py
cookbooks/ref_hallucination_arena/scoring/__init__.py
cookbooks/ref_hallucination_arena/scoring/objective_scorer.py
cookbooks/ref_hallucination_arena/scoring/ranking.py
cookbooks/ref_hallucination_arena/verifiers/__init__.py
cookbooks/ref_hallucination_arena/verifiers/arxiv_verifier.py
cookbooks/ref_hallucination_arena/verifiers/base_verifier.py
cookbooks/ref_hallucination_arena/verifiers/composite_verifier.py
cookbooks/ref_hallucination_arena/verifiers/crossref_verifier.py
cookbooks/ref_hallucination_arena/verifiers/dblp_verifier.py
cookbooks/ref_hallucination_arena/verifiers/pubmed_verifier.py
cookbooks/training_judge_model/bradley-terry/dataset.py
cookbooks/training_judge_model/bradley-terry/trainer.py
cookbooks/training_judge_model/grpo/chat_rl_dataset.py
cookbooks/training_judge_model/grpo/grader_rl_dataset.py
cookbooks/training_judge_model/grpo/pairwise/reward_fn.py
cookbooks/training_judge_model/grpo/pointwise/grader_reward_fn.py
cookbooks/training_judge_model/grpo/pointwise/reward_fn.py
cookbooks/training_judge_model/grpo/pointwise/utils/preprocess_grader_data.py
experiments/run_grader_evaluations.py
openjudge/__init__.py
openjudge/agentic/__init__.py
openjudge/agentic/agents.py
openjudge/agentic/tools.py
openjudge/agentic/adapters/__init__.py
openjudge/agentic/adapters/function.py
openjudge/analyzer/__init__.py
openjudge/analyzer/base_analyzer.py
openjudge/analyzer/pairwise_analyzer.py
openjudge/analyzer/statistical/__init__.py
openjudge/analyzer/statistical/consistency_analyzer.py
openjudge/analyzer/statistical/distribution_analyzer.py
openjudge/analyzer/validation/__init__.py
openjudge/analyzer/validation/accuracy_analyzer.py
openjudge/analyzer/validation/base_validation_analyzer.py
openjudge/analyzer/validation/correlation_analyzer.py
openjudge/analyzer/validation/f1_score_analyzer.py
openjudge/analyzer/validation/false_negative_analyzer.py
openjudge/analyzer/validation/false_positive_analyzer.py
openjudge/analyzer/validation/precision_analyzer.py
openjudge/analyzer/validation/recall_analyzer.py
openjudge/evaluation_strategy/__init__.py
openjudge/evaluation_strategy/average_evaluation_strategy.py
openjudge/evaluation_strategy/base_evaluation_strategy.py
openjudge/evaluation_strategy/direct_evaluation_strategy.py
openjudge/evaluation_strategy/voting_evaluation_strategy.py
openjudge/generator/__init__.py
openjudge/generator/base_generator.py
openjudge/generator/llm_grader_generator.py
openjudge/generator/iterative_rubric/__init__.py
openjudge/generator/iterative_rubric/categorizer.py
openjudge/generator/iterative_rubric/generator.py
openjudge/generator/iterative_rubric/mcr_selector.py
openjudge/generator/iterative_rubric/query_rubric_generator.py
openjudge/generator/simple_rubric/__init__.py
openjudge/generator/simple_rubric/generator.py
openjudge/generator/simple_rubric/rubric_generator.py
openjudge/graders/__init__.py
openjudge/graders/agentic_grader.py
openjudge/graders/base_grader.py
openjudge/graders/function_grader.py
openjudge/graders/llm_grader.py
openjudge/graders/schema.py
openjudge/graders/agent/__init__.py
openjudge/graders/agent/utils.py
openjudge/graders/agent/action/__init__.py
openjudge/graders/agent/action/action_alignment.py
openjudge/graders/agent/action/action_loop.py
openjudge/graders/agent/memory/__init__.py
openjudge/graders/agent/memory/memory_accuracy.py
openjudge/graders/agent/memory/memory_detail_preservation.py
openjudge/graders/agent/memory/memory_retrieval_effectiveness.py
openjudge/graders/agent/observation/__init__.py
openjudge/graders/agent/observation/observation_information_gain.py
openjudge/graders/agent/plan/__init__.py
openjudge/graders/agent/plan/plan_feasibility.py
openjudge/graders/agent/reflection/__init__.py
openjudge/graders/agent/reflection/reflection_accuracy.py
openjudge/graders/agent/reflection/reflection_outcome_understanding.py
openjudge/graders/agent/reflection/reflection_progress_awareness.py
openjudge/graders/agent/tool/__init__.py
openjudge/graders/agent/tool/tool_call_accuracy.py
openjudge/graders/agent/tool/tool_call_precision_recall_match.py
openjudge/graders/agent/tool/tool_call_step_sequence_match.py
openjudge/graders/agent/tool/tool_call_success.py
openjudge/graders/agent/tool/tool_parameter_check.py
openjudge/graders/agent/tool/tool_selection.py
openjudge/graders/agent/trajectory/__init__.py
openjudge/graders/agent/trajectory/trajectory_accuracy.py
openjudge/graders/agent/trajectory/trajectory_comprehensive.py
openjudge/graders/code/__init__.py
openjudge/graders/code/code_execution.py
openjudge/graders/code/code_style.py
openjudge/graders/code/patch_similarity.py
openjudge/graders/code/syntax_checker.py
openjudge/graders/code/_utils/__init__.py
openjudge/graders/code/_utils/testing_util.py
openjudge/graders/code/_utils/utils.py
openjudge/graders/common/__init__.py
openjudge/graders/common/correctness.py
openjudge/graders/common/hallucination.py
openjudge/graders/common/harmfulness.py
openjudge/graders/common/instruction_following.py
openjudge/graders/common/relevance.py
openjudge/graders/common/search_correctness.py
openjudge/graders/format/__init__.py
openjudge/graders/format/length_penalty.py
openjudge/graders/format/ngram_repetition_penalty.py
openjudge/graders/format/reasoning_format.py
openjudge/graders/format/reasoning_tool_format.py
openjudge/graders/format/json/__init__.py
openjudge/graders/format/json/json_match.py
openjudge/graders/format/json/json_validator.py
openjudge/graders/math/__init__.py
openjudge/graders/math/math_expression_verify.py
openjudge/graders/multi_turn/__init__.py
openjudge/graders/multi_turn/anaphora_resolution_grader.py
openjudge/graders/multi_turn/context_memory_grader.py
openjudge/graders/multi_turn/instruction_clarification_grader.py
openjudge/graders/multi_turn/proactive_interaction_grader.py
openjudge/graders/multi_turn/response_repetition_grader.py
openjudge/graders/multi_turn/self_correction_grader.py
openjudge/graders/multi_turn/topic_switch_grader.py
openjudge/graders/multimodal/__init__.py
openjudge/graders/multimodal/image_coherence.py
openjudge/graders/multimodal/image_helpfulness.py
openjudge/graders/multimodal/text_to_image.py
openjudge/graders/multimodal/_internal/__init__.py
openjudge/graders/multimodal/_internal/context_utils.py
openjudge/graders/multimodal/_internal/criteria_utils.py
openjudge/graders/multimodal/_internal/schema.py
openjudge/graders/text/__init__.py
openjudge/graders/text/number_accuracy.py
openjudge/graders/text/similarity.py
openjudge/graders/text/string_match.py
openjudge/graders/text/_utils/__init__.py
openjudge/graders/text/_utils/compute.py
openjudge/graders/text/_utils/normalization.py
openjudge/graders/text/_utils/setup_nltk_data.py
openjudge/graders/text/_utils/string_match_compute.py
openjudge/graders/text/_utils/tokenization.py
openjudge/models/__init__.py
openjudge/models/base_chat_model.py
openjudge/models/openai_chat_model.py
openjudge/models/qwen_vl_model.py
openjudge/models/formatter/__init__.py
openjudge/models/formatter/base_formatter.py
openjudge/models/formatter/dashscope_formatter.py
openjudge/models/schema/__init__.py
openjudge/models/schema/prompt_template.py
openjudge/models/schema/oai/__init__.py
openjudge/models/schema/oai/message.py
openjudge/models/schema/oai/response.py
openjudge/models/schema/qwen/__init__.py
openjudge/models/schema/qwen/mllmImage.py
openjudge/runner/__init__.py
openjudge/runner/base_runner.py
openjudge/runner/grading_runner.py
openjudge/runner/aggregator/__init__.py
openjudge/runner/aggregator/base_aggregator.py
openjudge/runner/aggregator/weighted_sum_aggregator.py
openjudge/runner/resource_executor/__init__.py
openjudge/runner/resource_executor/base_resource_executor.py
openjudge/runner/resource_executor/semaphore_resource_executor.py
openjudge/utils/__init__.py
openjudge/utils/concurrency.py
openjudge/utils/grader_info.py
openjudge/utils/instance.py
openjudge/utils/mapping.py
openjudge/utils/prompt_format_checker.py
openjudge/utils/tokenizer.py
openjudge/utils/utils.py
py_openjudge.egg-info/PKG-INFO
py_openjudge.egg-info/SOURCES.txt
py_openjudge.egg-info/dependency_links.txt
py_openjudge.egg-info/requires.txt
py_openjudge.egg-info/top_level.txt
skills/paper-review/scripts/review_paper.py
skills/paper-review/scripts/review_tex.py
tests/analyzer/statistical/test_distribution_analyzer.py
tests/analyzer/validation/test_accuracy_analyzer.py
tests/analyzer/validation/test_consistency_analyzer.py
tests/analyzer/validation/test_correlation_analyzer.py
tests/analyzer/validation/test_f1_score_analyzer.py
tests/analyzer/validation/test_false_negative_analyzer.py
tests/analyzer/validation/test_false_positive_analyzer.py
tests/analyzer/validation/test_precision_analyzer.py
tests/analyzer/validation/test_recall_analyzer.py
tests/benchmarks/test_rewardbench2.py
tests/data/run_grader.py
tests/data/run_grader_eval_bfcl_dataset.py
tests/data/utils/tool_call/generate_bfcl_tool_call_data.py
tests/data/utils/tool_call/generate_new_cases.py
tests/data/utils/tool_call/llm_select_tools.py
tests/data/utils/tool_call/process_bfcl_tool_call_data.py
tests/docs/test_building_graders_custom.py
tests/docs/test_building_graders_overview.py
tests/evaluation_strategy/test_average_evaluation_strategy.py
tests/evaluation_strategy/test_direct_evaluation_strategy.py
tests/evaluation_strategy/test_voting_evaluation_strategy.py
tests/generator/test_iterative_rubric.py
tests/generator/test_simple_rubric.py
tests/graders/test_base_grader.py
tests/graders/test_llm_grader.py
tests/graders/agent/action/test_action_alignment.py
tests/graders/agent/action/test_action_loop.py
tests/graders/agent/memory/test_memory_accuracy.py
tests/graders/agent/memory/test_memory_detail_preservation.py
tests/graders/agent/memory/test_memory_retrieval_effectiveness.py
tests/graders/agent/observation/test_observation_information_gain.py
tests/graders/agent/plan/test_plan_feasibility.py
tests/graders/agent/reflection/test_reflection_accuracy.py
tests/graders/agent/reflection/test_reflection_outcome_understanding.py
tests/graders/agent/reflection/test_reflection_progress_awareness.py
tests/graders/agent/tool/test_tool_call_accuracy.py
tests/graders/agent/tool/test_tool_call_precision_recall_match.py
tests/graders/agent/tool/test_tool_call_step_sequence_match.py
tests/graders/agent/tool/test_tool_call_success.py
tests/graders/agent/tool/test_tool_parameter_check.py
tests/graders/agent/tool/test_tool_selection.py
tests/graders/agent/trajectory/test_trajectory_accuracy.py
tests/graders/agent/trajectory/test_trajectory_comprehensive.py
tests/graders/common/test_correctness.py
tests/graders/common/test_function_grader.py
tests/graders/common/test_hallucination.py
tests/graders/common/test_harmfulness.py
tests/graders/common/test_instruction_following.py
tests/graders/common/test_relevance.py
tests/graders/common/test_search_correctness.py
tests/graders/format/test_json_match.py
tests/graders/format/test_json_validator.py
tests/graders/multi_turn/test_anaphora_resolution.py
tests/graders/multi_turn/test_context_memory.py
tests/graders/multi_turn/test_instruction_clarification.py
tests/graders/multi_turn/test_proactive_interaction.py
tests/graders/multi_turn/test_response_repetition.py
tests/graders/multi_turn/test_self_correction.py
tests/graders/multi_turn/test_topic_switch.py
tests/graders/multimodal/test_image_coherence.py
tests/graders/multimodal/test_image_helpfulness.py
tests/graders/multimodal/test_text_to_image.py
tests/graders/text/similarity/__init__.py
tests/graders/text/similarity/test_bleu.py
tests/graders/text/similarity/test_f1_score.py
tests/graders/text/similarity/test_fuzzy_match.py
tests/graders/text/similarity/test_rouge.py
tests/graders/text/string/test_string_match.py
tests/models/test_openai_chat_model.py
tests/models/schema/test_prompt_template.py
tests/runner/test_grading_runner.py
tests/runner/aggregator/test_weighted_sum_aggregator.py
tests/utils/test_grader_info.py
tests/utils/test_mapping.py
ui/app.py
ui/core/__init__.py
ui/core/base_feature.py
ui/core/feature_registry.py
ui/core/navigation.py
ui/core/session_manager.py
ui/core/task_manager.py
ui/features/__init__.py
ui/features/auto_arena/__init__.py
ui/features/auto_arena/feature.py
ui/features/auto_arena/components/__init__.py
ui/features/auto_arena/components/config_panel.py
ui/features/auto_arena/components/history_panel.py
ui/features/auto_arena/components/preset_panel.py
ui/features/auto_arena/components/progress_panel.py
ui/features/auto_arena/components/report_viewer.py
ui/features/auto_arena/components/result_panel.py
ui/features/auto_arena/components/sidebar.py
ui/features/auto_arena/services/__init__.py
ui/features/auto_arena/services/history_manager.py
ui/features/auto_arena/services/pipeline_runner.py
ui/features/auto_arena/services/preset_manager.py
ui/features/auto_rubric/__init__.py
ui/features/auto_rubric/feature.py
ui/features/auto_rubric/components/__init__.py
ui/features/auto_rubric/components/data_upload_panel.py
ui/features/auto_rubric/components/history_panel.py
ui/features/auto_rubric/components/iterative_config_panel.py
ui/features/auto_rubric/components/result_panel.py
ui/features/auto_rubric/components/rubric_tester.py
ui/features/auto_rubric/components/sidebar.py
ui/features/auto_rubric/components/simple_config_panel.py
ui/features/auto_rubric/services/__init__.py
ui/features/auto_rubric/services/data_parser.py
ui/features/auto_rubric/services/export_service.py
ui/features/auto_rubric/services/history_manager.py
ui/features/auto_rubric/services/rubric_generator_service.py
ui/features/grader/__init__.py
ui/features/grader/feature.py
ui/features/grader/components/__init__.py
ui/features/grader/components/input_panel.py
ui/features/grader/components/multimodal.py
ui/features/grader/components/result_panel.py
ui/features/grader/components/sidebar.py
ui/features/grader/components/batch/__init__.py
ui/features/grader/components/batch/batch_history_panel.py
ui/features/grader/components/batch/batch_progress_panel.py
ui/features/grader/components/batch/batch_result_panel.py
ui/features/grader/components/batch/upload_panel.py
ui/features/grader/config/__init__.py
ui/features/grader/config/constants.py
ui/features/grader/config/grader_registry.py
ui/features/grader/services/__init__.py
ui/features/grader/services/batch_history_manager.py
ui/features/grader/services/batch_runner.py
ui/features/grader/services/file_parser.py
ui/features/grader/services/grader_factory.py
ui/features/grader/services/single_evaluation_logger.py
ui/features/paper_review/__init__.py
ui/features/paper_review/feature.py
ui/features/paper_review/components/__init__.py
ui/features/paper_review/components/batch_panel.py
ui/features/paper_review/components/history_panel.py
ui/features/paper_review/components/progress_panel.py
ui/features/paper_review/components/result_panel.py
ui/features/paper_review/services/__init__.py
ui/features/paper_review/services/batch_runner.py
ui/features/paper_review/services/history_service.py
ui/features/paper_review/services/pipeline_runner.py
ui/shared/__init__.py
ui/shared/constants.py
ui/shared/components/__init__.py
ui/shared/components/common.py
ui/shared/components/logo.py
ui/shared/components/workspace_selector.py
ui/shared/i18n/__init__.py
ui/shared/i18n/core.py
ui/shared/i18n/translations/__init__.py
ui/shared/i18n/translations/auto_arena.py
ui/shared/i18n/translations/auto_rubric.py
ui/shared/i18n/translations/common.py
ui/shared/i18n/translations/grader.py
ui/shared/i18n/translations/paper_review.py
ui/shared/services/__init__.py
ui/shared/services/model_factory.py
ui/shared/services/workspace_manager.py
ui/shared/styles/__init__.py
ui/shared/styles/theme.py
ui/shared/utils/__init__.py
ui/shared/utils/helpers.py