From 0bee0c799ba3c180367c69303711aa3b3f931285 Mon Sep 17 00:00:00 2001 From: ale Date: Sat, 23 Aug 2025 13:13:15 +0200 Subject: [PATCH] fmt errors Signed-off-by: ale --- benches/compilation_benchmark.rs | 16 +- src/codegen.rs | 385 ++++++++++++++++++++----------- src/compiler.rs | 123 ++++++---- src/lexer.rs | 162 +++++++++---- src/lib.rs | 12 +- src/linker.rs | 23 +- src/main.rs | 26 +-- src/optimizer.rs | 32 ++- src/parser.rs | 313 ++++++++++++++++--------- src/targets.rs | 25 +- tests/integration_tests.rs | 32 +-- 11 files changed, 736 insertions(+), 413 deletions(-) diff --git a/benches/compilation_benchmark.rs b/benches/compilation_benchmark.rs index a6ee553..1b11699 100644 --- a/benches/compilation_benchmark.rs +++ b/benches/compilation_benchmark.rs @@ -1,9 +1,9 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use alecc::lexer::Lexer; -use alecc::parser::Parser; use alecc::codegen::CodeGenerator; -use alecc::optimizer::{Optimizer, OptimizationLevel}; +use alecc::lexer::Lexer; +use alecc::optimizer::{OptimizationLevel, Optimizer}; +use alecc::parser::Parser; use alecc::targets::Target; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; const SIMPLE_C_CODE: &str = r#" int main() { @@ -119,5 +119,11 @@ fn bench_optimizer(c: &mut Criterion) { }); } -criterion_group!(benches, bench_lexer, bench_parser, bench_codegen, bench_optimizer); +criterion_group!( + benches, + bench_lexer, + bench_parser, + bench_codegen, + bench_optimizer +); criterion_main!(benches); diff --git a/src/codegen.rs b/src/codegen.rs index f31609c..34c79b9 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,6 +1,8 @@ -use crate::parser::{Program, Function, Expression, Statement, Type, BinaryOperator, UnaryOperator}; -use crate::targets::Target; use crate::error::{AleccError, Result}; +use crate::parser::{ + BinaryOperator, Expression, Function, Program, Statement, Type, UnaryOperator, +}; +use crate::targets::Target; use std::collections::HashMap; pub struct CodeGenerator { @@ -11,8 +13,8 @@ pub struct CodeGenerator { current_function_params: Vec<(String, i32)>, // (name, stack_offset) epilogue_emitted: bool, local_variables: HashMap, // (name, stack_offset) - stack_offset: i32, // Current stack offset for local variables - last_call_stack_cleanup: usize, // Stack bytes to clean up after last call + stack_offset: i32, // Current stack offset for local variables + last_call_stack_cleanup: usize, // Stack bytes to clean up after last call } impl CodeGenerator { @@ -35,9 +37,9 @@ impl CodeGenerator { for function in &program.functions { self.collect_string_literals_from_statement(&function.body)?; } - + self.emit_header(); - + // Generate string literals section if !self.string_literals.is_empty() { self.emit_line(".section .rodata"); @@ -74,23 +76,23 @@ impl CodeGenerator { self.emit_line(""); self.emit_line(".globl _start"); self.emit_line("_start:"); - + // Set up stack and call main self.emit_line(" push rbp"); self.emit_line(" mov rbp, rsp"); - + // Reserve space for temporary operations (ensures proper stack alignment) // 120 bytes = 15*8, so after rbp push (8 bytes), total is 128 bytes = multiple of 16 self.emit_line(" sub rsp, 120"); - + // Call main function self.emit_line(" call main"); - + // Exit syscall with main's return value - self.emit_line(" mov rdi, rax"); // exit status = main's return value - self.emit_line(" mov rax, 60"); // sys_exit syscall number - self.emit_line(" syscall"); // invoke syscall - + self.emit_line(" mov rdi, rax"); // exit status = main's return value + self.emit_line(" mov rax, 60"); // sys_exit syscall number + self.emit_line(" syscall"); // invoke syscall + Ok(()) } @@ -122,27 +124,27 @@ impl CodeGenerator { // This is a function definition, generate the actual function } } - + self.emit_line(&format!(".globl {}", function.name)); self.emit_line(&format!("{}:", function.name)); - + // Set up parameter tracking self.current_function_params.clear(); self.local_variables.clear(); // Start local variables after parameters to avoid collision self.stack_offset = -(function.parameters.len() as i32 * 8); self.epilogue_emitted = false; - + // Function prologue self.emit_function_prologue(&function.parameters)?; - + // Function body self.generate_statement(&function.body)?; - + // Function epilogue (always ensure we have a proper function ending) // This handles cases where there might not be explicit returns in all paths self.emit_function_epilogue()?; - + self.emit_line(""); Ok(()) } @@ -152,59 +154,64 @@ impl CodeGenerator { Target::I386 => { self.emit_line(" push ebp"); self.emit_line(" mov ebp, esp"); - + // Reserve space for parameters only (no extra temporaries for now) let stack_space = parameters.len() * 4; if stack_space > 0 { self.emit_line(&format!(" sub esp, {}", stack_space)); } - + // Store parameters from stack (i386 calling convention) for (i, (name, _)) in parameters.iter().enumerate() { let param_offset = -(i as i32 + 1) * 4; let stack_offset = 8 + i as i32 * 4; // ebp + 8 + offset self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", stack_offset)); self.emit_line(&format!(" mov DWORD PTR [ebp + {}], eax", param_offset)); - self.current_function_params.push((name.clone(), param_offset)); + self.current_function_params + .push((name.clone(), param_offset)); } } Target::Amd64 => { self.emit_line(" push rbp"); self.emit_line(" mov rbp, rsp"); - + // Reserve space for parameters + ensure 16-byte alignment let stack_space = parameters.len() * 8; // Always reserve at least 8 bytes to maintain 16-byte alignment after rbp push let min_space = if stack_space == 0 { 8 } else { stack_space }; let aligned_space = ((min_space + 15) / 16) * 16; // Round up to 16-byte boundary self.emit_line(&format!(" sub rsp, {}", aligned_space)); - + // Store parameters from registers (x86_64 calling convention) let param_registers = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]; for (i, (name, _)) in parameters.iter().enumerate() { let param_offset = -(i as i32 + 1) * 8; if i < param_registers.len() { // Parameter passed in register - self.emit_line(&format!(" mov QWORD PTR [rbp + {}], {}", param_offset, param_registers[i])); + self.emit_line(&format!( + " mov QWORD PTR [rbp + {}], {}", + param_offset, param_registers[i] + )); } else { // Parameter passed on stack let stack_offset = 16 + (i - param_registers.len()) as i32 * 8; self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", stack_offset)); self.emit_line(&format!(" mov QWORD PTR [rbp + {}], rax", param_offset)); } - self.current_function_params.push((name.clone(), param_offset)); + self.current_function_params + .push((name.clone(), param_offset)); } } Target::Arm64 => { self.emit_line(" stp x29, x30, [sp, #-16]!"); self.emit_line(" mov x29, sp"); - + let stack_space = parameters.len() * 8; if stack_space > 0 { let aligned_space = (stack_space + 15) & !15; // 16-byte aligned self.emit_line(&format!(" sub sp, sp, #{}", aligned_space)); } - + // Store parameters from registers (ARM64 calling convention) for (i, (name, _)) in parameters.iter().enumerate() { let param_offset = -(i as i32 + 1) * 8; @@ -217,7 +224,8 @@ impl CodeGenerator { self.emit_line(&format!(" ldr x9, [x29, #{}]", stack_offset)); self.emit_line(&format!(" str x9, [x29, #{}]", param_offset)); } - self.current_function_params.push((name.clone(), param_offset)); + self.current_function_params + .push((name.clone(), param_offset)); } } } @@ -228,7 +236,7 @@ impl CodeGenerator { if self.epilogue_emitted { return Ok(()); // Don't emit duplicate epilogues } - + match self.target { Target::I386 => { self.emit_line(" mov esp, ebp"); @@ -246,7 +254,7 @@ impl CodeGenerator { self.emit_line(" ret"); } } - + self.epilogue_emitted = true; Ok(()) } @@ -270,7 +278,7 @@ impl CodeGenerator { self.emit_line(" ret"); } } - + self.epilogue_emitted = true; Ok(()) } @@ -280,30 +288,40 @@ impl CodeGenerator { Statement::Expression(expr) => { self.generate_expression(expr)?; } - Statement::Declaration { name, var_type, initializer } => { + Statement::Declaration { + name, + var_type, + initializer, + } => { // Calculate space needed based on type let size = match var_type { Type::Array(_, Some(length)) => length * 8, // Assuming 8-byte elements - Type::Array(_, None) => 80, // Default size for unsized arrays - _ => 8, // Default 8 bytes for simple types + Type::Array(_, None) => 80, // Default size for unsized arrays + _ => 8, // Default 8 bytes for simple types }; - + // Allocate space for variable/array self.stack_offset -= size as i32; let var_offset = self.stack_offset; - + // Store variable name and offset for later reference self.local_variables.insert(name.clone(), var_offset); - + if let Some(init_expr) = initializer { self.generate_expression(init_expr)?; // Store the value in the local variable slot match self.target { Target::Amd64 => { - self.emit_line(&format!(" mov QWORD PTR [rbp + {}], rax", var_offset)); + self.emit_line(&format!( + " mov QWORD PTR [rbp + {}], rax", + var_offset + )); } Target::I386 => { - self.emit_line(&format!(" mov DWORD PTR [ebp + {}], eax", var_offset)); + self.emit_line(&format!( + " mov DWORD PTR [ebp + {}], eax", + var_offset + )); } Target::Arm64 => { self.emit_line(&format!(" str x0, [x29, #{}]", var_offset)); @@ -335,16 +353,20 @@ impl CodeGenerator { self.generate_statement(stmt)?; } } - Statement::If { condition, then_stmt, else_stmt } => { + Statement::If { + condition, + then_stmt, + else_stmt, + } => { let else_label = self.new_label("else"); let end_label = self.new_label("endif"); - + self.generate_expression(condition)?; self.emit_conditional_jump(false, &else_label)?; - + self.generate_statement(then_stmt)?; self.emit_jump(&end_label)?; - + self.emit_line(&format!("{}:", else_label)); if let Some(else_stmt) = else_stmt { // Reset epilogue flag for else branch in case it contains a return @@ -356,47 +378,52 @@ impl CodeGenerator { self.epilogue_emitted = saved_epilogue_state; } } - + self.emit_line(&format!("{}:", end_label)); } Statement::While { condition, body } => { let loop_label = self.new_label("loop"); let end_label = self.new_label("endloop"); - + self.emit_line(&format!("{}:", loop_label)); self.generate_expression(condition)?; self.emit_conditional_jump(false, &end_label)?; - + self.generate_statement(body)?; self.emit_jump(&loop_label)?; - + self.emit_line(&format!("{}:", end_label)); } - Statement::For { init, condition, increment, body } => { + Statement::For { + init, + condition, + increment, + body, + } => { // Generate initialization if let Some(init_stmt) = init { self.generate_statement(init_stmt)?; } - + let loop_label = self.new_label("forloop"); let end_label = self.new_label("endfor"); - + self.emit_line(&format!("{}:", loop_label)); - + // Generate condition check if let Some(cond_expr) = condition { self.generate_expression(cond_expr)?; self.emit_conditional_jump(false, &end_label)?; } - + // Generate body self.generate_statement(body)?; - + // Generate increment if let Some(inc_expr) = increment { self.generate_expression(inc_expr)?; } - + self.emit_jump(&loop_label)?; self.emit_line(&format!("{}:", end_label)); } @@ -412,19 +439,17 @@ impl CodeGenerator { fn generate_expression(&mut self, expression: &Expression) -> Result<()> { match expression { - Expression::IntegerLiteral(value) => { - match self.target { - Target::I386 => { - self.emit_line(&format!(" mov eax, {}", value)); - } - Target::Amd64 => { - self.emit_line(&format!(" mov rax, {}", value)); - } - Target::Arm64 => { - self.emit_line(&format!(" mov x0, #{}", value)); - } + Expression::IntegerLiteral(value) => match self.target { + Target::I386 => { + self.emit_line(&format!(" mov eax, {}", value)); } - } + Target::Amd64 => { + self.emit_line(&format!(" mov rax, {}", value)); + } + Target::Arm64 => { + self.emit_line(&format!(" mov x0, #{}", value)); + } + }, Expression::StringLiteral(value) => { let label = self.get_string_literal_label(value); match self.target { @@ -442,7 +467,11 @@ impl CodeGenerator { } Expression::Identifier(name) => { // Check if it's a function parameter first - if let Some((_, offset)) = self.current_function_params.iter().find(|(param_name, _)| param_name == name) { + if let Some((_, offset)) = self + .current_function_params + .iter() + .find(|(param_name, _)| param_name == name) + { // Load parameter from stack match self.target { Target::I386 => { @@ -485,7 +514,10 @@ impl CodeGenerator { } } } - Expression::Call { function, arguments } => { + Expression::Call { + function, + arguments, + } => { // Generate arguments and place in calling convention registers/stack match self.target { Target::I386 => { @@ -498,13 +530,17 @@ impl CodeGenerator { Target::Amd64 => { // x86_64: first 6 args in registers, rest on stack let param_registers = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]; - + // Ensure stack alignment before function call // Stack must be 16-byte aligned before 'call' instruction // Since 'call' pushes 8 bytes (return address), we need stack to be 8 bytes off 16-byte boundary - let stack_args = if arguments.len() > param_registers.len() { arguments.len() - param_registers.len() } else { 0 }; + let stack_args = if arguments.len() > param_registers.len() { + arguments.len() - param_registers.len() + } else { + 0 + }; let mut stack_cleanup_size = 0; - + // Handle stack arguments if any if stack_args > 0 { let total_stack_bytes = stack_args * 8; @@ -516,7 +552,7 @@ impl CodeGenerator { stack_cleanup_size += stack_args * 8; } // Note: No additional alignment for register-only calls since function prologue handles it - + // First, save any arguments that go on the stack (in reverse order) if arguments.len() > param_registers.len() { for arg in arguments.iter().skip(param_registers.len()).rev() { @@ -524,14 +560,15 @@ impl CodeGenerator { self.emit_line(" push rax"); } } - + // Then handle register arguments in reverse order to avoid overwriting - let reg_args: Vec<_> = arguments.iter().take(param_registers.len()).collect(); + let reg_args: Vec<_> = + arguments.iter().take(param_registers.len()).collect(); for (i, arg) in reg_args.iter().enumerate().rev() { self.generate_expression(arg)?; self.emit_line(&format!(" mov {}, rax", param_registers[i])); } - + // Store cleanup size for later use self.last_call_stack_cleanup = stack_cleanup_size; } @@ -544,7 +581,7 @@ impl CodeGenerator { self.emit_line(" str x0, [sp, #-16]!"); } } - + // Then handle register arguments in reverse order let reg_args: Vec<_> = arguments.iter().take(8).collect(); for (i, arg) in reg_args.iter().enumerate().rev() { @@ -556,7 +593,7 @@ impl CodeGenerator { } } } - + if let Expression::Identifier(func_name) = function.as_ref() { self.emit_line(&format!(" call {}", func_name)); } else { @@ -564,7 +601,7 @@ impl CodeGenerator { message: "Indirect function calls not implemented".to_string(), }); } - + // Clean up stack for arguments that were pushed match self.target { Target::I386 => { @@ -576,73 +613,87 @@ impl CodeGenerator { Target::Amd64 => { // Clean up stack using stored cleanup size if self.last_call_stack_cleanup > 0 { - self.emit_line(&format!(" add rsp, {}", self.last_call_stack_cleanup)); + self.emit_line(&format!( + " add rsp, {}", + self.last_call_stack_cleanup + )); } } Target::Arm64 => { // Clean up stack arguments (if any) - let stack_args = if arguments.len() > 8 { arguments.len() - 8 } else { 0 }; + let stack_args = if arguments.len() > 8 { + arguments.len() - 8 + } else { + 0 + }; if stack_args > 0 { self.emit_line(&format!(" add sp, sp, #{}", stack_args * 16)); } } } } - Expression::Binary { left, operator, right } => { + Expression::Binary { + left, + operator, + right, + } => { // Generate binary operations // First generate right operand and save it self.generate_expression(right)?; match self.target { Target::I386 => { - self.emit_line(" push eax"); // Save right operand + self.emit_line(" push eax"); // Save right operand } Target::Amd64 => { - self.emit_line(" push rax"); // Save right operand + self.emit_line(" push rax"); // Save right operand } Target::Arm64 => { - self.emit_line(" str x0, [sp, #-16]!"); // Save right operand + self.emit_line(" str x0, [sp, #-16]!"); // Save right operand } } - + // Generate left operand self.generate_expression(left)?; - + // Pop right operand and perform operation match self.target { Target::I386 => { - self.emit_line(" pop ebx"); // Right operand in ebx + self.emit_line(" pop ebx"); // Right operand in ebx match operator { BinaryOperator::Add => self.emit_line(" add eax, ebx"), BinaryOperator::Subtract => self.emit_line(" sub eax, ebx"), BinaryOperator::Multiply => self.emit_line(" imul eax, ebx"), BinaryOperator::Divide => { - self.emit_line(" cdq"); // Sign extend eax to edx:eax + self.emit_line(" cdq"); // Sign extend eax to edx:eax self.emit_line(" idiv ebx"); } BinaryOperator::Modulo => { - self.emit_line(" cdq"); // Sign extend eax to edx:eax + self.emit_line(" cdq"); // Sign extend eax to edx:eax self.emit_line(" idiv ebx"); self.emit_line(" mov eax, edx"); // Remainder is in edx } _ => { return Err(AleccError::CodegenError { - message: format!("Binary operator {:?} not implemented for i386", operator), + message: format!( + "Binary operator {:?} not implemented for i386", + operator + ), }); } } } Target::Amd64 => { - self.emit_line(" pop rbx"); // Right operand in rbx + self.emit_line(" pop rbx"); // Right operand in rbx match operator { BinaryOperator::Add => self.emit_line(" add rax, rbx"), BinaryOperator::Subtract => self.emit_line(" sub rax, rbx"), BinaryOperator::Multiply => self.emit_line(" imul rax, rbx"), BinaryOperator::Divide => { - self.emit_line(" cqo"); // Sign extend rax to rdx:rax + self.emit_line(" cqo"); // Sign extend rax to rdx:rax self.emit_line(" idiv rbx"); } BinaryOperator::Modulo => { - self.emit_line(" cqo"); // Sign extend rax to rdx:rax + self.emit_line(" cqo"); // Sign extend rax to rdx:rax self.emit_line(" idiv rbx"); self.emit_line(" mov rax, rdx"); // Remainder is in rdx } @@ -710,19 +761,22 @@ impl CodeGenerator { } } Target::Arm64 => { - self.emit_line(" ldr x1, [sp], #16"); // Right operand in x1 + self.emit_line(" ldr x1, [sp], #16"); // Right operand in x1 match operator { BinaryOperator::Add => self.emit_line(" add x0, x0, x1"), BinaryOperator::Subtract => self.emit_line(" sub x0, x0, x1"), BinaryOperator::Multiply => self.emit_line(" mul x0, x0, x1"), BinaryOperator::Divide => self.emit_line(" sdiv x0, x0, x1"), BinaryOperator::Modulo => { - self.emit_line(" sdiv x2, x0, x1"); // x2 = x0 / x1 + self.emit_line(" sdiv x2, x0, x1"); // x2 = x0 / x1 self.emit_line(" msub x0, x2, x1, x0"); // x0 = x0 - (x2 * x1) } _ => { return Err(AleccError::CodegenError { - message: format!("Binary operator {:?} not implemented for arm64", operator), + message: format!( + "Binary operator {:?} not implemented for arm64", + operator + ), }); } } @@ -788,12 +842,24 @@ impl CodeGenerator { if let Some(&offset) = self.local_variables.get(name) { match self.target { Target::I386 => { - self.emit_line(&format!(" inc DWORD PTR [ebp + {}]", offset)); - self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset)); + self.emit_line(&format!( + " inc DWORD PTR [ebp + {}]", + offset + )); + self.emit_line(&format!( + " mov eax, DWORD PTR [ebp + {}]", + offset + )); } Target::Amd64 => { - self.emit_line(&format!(" inc QWORD PTR [rbp + {}]", offset)); - self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset)); + self.emit_line(&format!( + " inc QWORD PTR [rbp + {}]", + offset + )); + self.emit_line(&format!( + " mov rax, QWORD PTR [rbp + {}]", + offset + )); } Target::Arm64 => { self.emit_line(&format!(" ldr x0, [x29, #{}]", offset)); @@ -808,7 +874,8 @@ impl CodeGenerator { } } else { return Err(AleccError::CodegenError { - message: "Pre-increment can only be applied to variables".to_string(), + message: "Pre-increment can only be applied to variables" + .to_string(), }); } } @@ -818,12 +885,24 @@ impl CodeGenerator { if let Some(&offset) = self.local_variables.get(name) { match self.target { Target::I386 => { - self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset)); - self.emit_line(&format!(" inc DWORD PTR [ebp + {}]", offset)); + self.emit_line(&format!( + " mov eax, DWORD PTR [ebp + {}]", + offset + )); + self.emit_line(&format!( + " inc DWORD PTR [ebp + {}]", + offset + )); } Target::Amd64 => { - self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset)); - self.emit_line(&format!(" inc QWORD PTR [rbp + {}]", offset)); + self.emit_line(&format!( + " mov rax, QWORD PTR [rbp + {}]", + offset + )); + self.emit_line(&format!( + " inc QWORD PTR [rbp + {}]", + offset + )); } Target::Arm64 => { self.emit_line(&format!(" ldr x0, [x29, #{}]", offset)); @@ -839,7 +918,8 @@ impl CodeGenerator { } } else { return Err(AleccError::CodegenError { - message: "Post-increment can only be applied to variables".to_string(), + message: "Post-increment can only be applied to variables" + .to_string(), }); } } @@ -849,12 +929,24 @@ impl CodeGenerator { if let Some(&offset) = self.local_variables.get(name) { match self.target { Target::I386 => { - self.emit_line(&format!(" dec DWORD PTR [ebp + {}]", offset)); - self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset)); + self.emit_line(&format!( + " dec DWORD PTR [ebp + {}]", + offset + )); + self.emit_line(&format!( + " mov eax, DWORD PTR [ebp + {}]", + offset + )); } Target::Amd64 => { - self.emit_line(&format!(" dec QWORD PTR [rbp + {}]", offset)); - self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset)); + self.emit_line(&format!( + " dec QWORD PTR [rbp + {}]", + offset + )); + self.emit_line(&format!( + " mov rax, QWORD PTR [rbp + {}]", + offset + )); } Target::Arm64 => { self.emit_line(&format!(" ldr x0, [x29, #{}]", offset)); @@ -869,7 +961,8 @@ impl CodeGenerator { } } else { return Err(AleccError::CodegenError { - message: "Pre-decrement can only be applied to variables".to_string(), + message: "Pre-decrement can only be applied to variables" + .to_string(), }); } } @@ -879,12 +972,24 @@ impl CodeGenerator { if let Some(&offset) = self.local_variables.get(name) { match self.target { Target::I386 => { - self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset)); - self.emit_line(&format!(" dec DWORD PTR [ebp + {}]", offset)); + self.emit_line(&format!( + " mov eax, DWORD PTR [ebp + {}]", + offset + )); + self.emit_line(&format!( + " dec DWORD PTR [ebp + {}]", + offset + )); } Target::Amd64 => { - self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset)); - self.emit_line(&format!(" dec QWORD PTR [rbp + {}]", offset)); + self.emit_line(&format!( + " mov rax, QWORD PTR [rbp + {}]", + offset + )); + self.emit_line(&format!( + " dec QWORD PTR [rbp + {}]", + offset + )); } Target::Arm64 => { self.emit_line(&format!(" ldr x0, [x29, #{}]", offset)); @@ -900,7 +1005,8 @@ impl CodeGenerator { } } else { return Err(AleccError::CodegenError { - message: "Post-decrement can only be applied to variables".to_string(), + message: "Post-decrement can only be applied to variables" + .to_string(), }); } } @@ -953,13 +1059,13 @@ impl CodeGenerator { if let Some(&base_offset) = self.local_variables.get(array_name) { // Generate the index expression self.generate_expression(index)?; - + // Calculate the array element address: base + index * element_size match self.target { Target::Amd64 => { // Multiply index by 8 (assuming int is 8 bytes for simplicity) self.emit_line(" imul rax, 8"); // Use imul instead of mul - // Add base address + // Add base address self.emit_line(&format!(" lea rbx, [rbp + {}]", base_offset)); self.emit_line(" add rax, rbx"); // Load the value at that address @@ -991,7 +1097,11 @@ impl CodeGenerator { }); } } - Expression::Assignment { target, operator, value } => { + Expression::Assignment { + target, + operator, + value, + } => { // Handle compound assignment operators match operator { crate::parser::AssignmentOperator::Assign => { @@ -1105,7 +1215,8 @@ impl CodeGenerator { } } else { return Err(AleccError::CodegenError { - message: "Complex assignment targets not supported for compound operators yet".to_string(), + message: "Complex assignment targets not supported for compound operators yet" + .to_string(), }); } Ok(()) @@ -1144,7 +1255,8 @@ impl CodeGenerator { } } else { return Err(AleccError::CodegenError { - message: "Complex assignment targets not supported for compound operators yet".to_string(), + message: "Complex assignment targets not supported for compound operators yet" + .to_string(), }); } Ok(()) @@ -1152,7 +1264,7 @@ impl CodeGenerator { fn emit_conditional_jump(&mut self, condition: bool, label: &str) -> Result<()> { let instruction = if condition { "jnz" } else { "jz" }; - + match self.target { Target::I386 | Target::Amd64 => { self.emit_line(&format!(" test eax, eax")); @@ -1209,7 +1321,8 @@ impl CodeGenerator { label.clone() } else { let label = format!(".LC{}", self.string_literals.len()); - self.string_literals.insert(content.to_string(), label.clone()); + self.string_literals + .insert(content.to_string(), label.clone()); label } } @@ -1248,7 +1361,11 @@ impl CodeGenerator { } Ok(()) } - Statement::If { condition, then_stmt, else_stmt } => { + Statement::If { + condition, + then_stmt, + else_stmt, + } => { self.collect_string_literals_from_expression(condition)?; self.collect_string_literals_from_statement(then_stmt)?; if let Some(else_statement) = else_stmt { @@ -1261,7 +1378,12 @@ impl CodeGenerator { self.collect_string_literals_from_statement(body)?; Ok(()) } - Statement::For { init, condition, increment, body } => { + Statement::For { + init, + condition, + increment, + body, + } => { if let Some(init_stmt) = init { self.collect_string_literals_from_statement(init_stmt)?; } @@ -1280,7 +1402,7 @@ impl CodeGenerator { } Ok(()) } - _ => Ok(()) // Other statement types don't have expressions we need to collect + _ => Ok(()), // Other statement types don't have expressions we need to collect } } @@ -1299,7 +1421,10 @@ impl CodeGenerator { self.collect_string_literals_from_expression(operand)?; Ok(()) } - Expression::Call { function, arguments } => { + Expression::Call { + function, + arguments, + } => { self.collect_string_literals_from_expression(function)?; for arg in arguments { self.collect_string_literals_from_expression(arg)?; @@ -1311,7 +1436,7 @@ impl CodeGenerator { self.collect_string_literals_from_expression(value)?; Ok(()) } - _ => Ok(()) // Other expression types don't contain string literals + _ => Ok(()), // Other expression types don't contain string literals } } } diff --git a/src/compiler.rs b/src/compiler.rs index 7e81200..fcd32f9 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,15 +1,15 @@ use crate::cli::Args; -use crate::lexer::Lexer; -use crate::parser::Parser; use crate::codegen::CodeGenerator; -use crate::optimizer::{Optimizer, OptimizationLevel}; -use crate::linker::Linker; -use crate::targets::Target; use crate::error::{AleccError, Result}; +use crate::lexer::Lexer; +use crate::linker::Linker; +use crate::optimizer::{OptimizationLevel, Optimizer}; +use crate::parser::Parser; +use crate::targets::Target; use std::path::{Path, PathBuf}; use std::process::Command; use tokio::fs; -use tracing::{info, debug, warn}; +use tracing::{debug, info, warn}; pub struct Compiler { args: Args, @@ -19,11 +19,10 @@ pub struct Compiler { impl Compiler { pub fn new(args: Args) -> Result { - let target = Target::from_string(&args.target).ok_or_else(|| { - AleccError::UnsupportedTarget { + let target = + Target::from_string(&args.target).ok_or_else(|| AleccError::UnsupportedTarget { target: args.target.clone(), - } - })?; + })?; Ok(Self { args, @@ -39,9 +38,11 @@ impl Compiler { }); } - info!("Compiling {} files for target {}", - self.args.input_files.len(), - self.target.as_str()); + info!( + "Compiling {} files for target {}", + self.args.input_files.len(), + self.target.as_str() + ); let mut object_files = Vec::new(); let input_files = self.args.input_files.clone(); // Clone to avoid borrow issues @@ -49,15 +50,19 @@ impl Compiler { // Process each input file for input_file in &input_files { debug!("Processing file: {}", input_file.display()); - - let extension = input_file.extension() + + let extension = input_file + .extension() .and_then(|ext| ext.to_str()) .unwrap_or(""); match extension { "c" | "cpp" | "cxx" | "cc" | "C" => { let obj_file = self.compile_source_file(input_file).await?; - if !self.args.compile_only && !self.args.assembly_only && !self.args.preprocess_only { + if !self.args.compile_only + && !self.args.assembly_only + && !self.args.preprocess_only + { object_files.push(obj_file); } } @@ -71,10 +76,15 @@ impl Compiler { object_files.push(input_file.clone()); } _ => { - warn!("Unknown file extension for {}, treating as C source", - input_file.display()); + warn!( + "Unknown file extension for {}, treating as C source", + input_file.display() + ); let obj_file = self.compile_source_file(input_file).await?; - if !self.args.compile_only && !self.args.assembly_only && !self.args.preprocess_only { + if !self.args.compile_only + && !self.args.assembly_only + && !self.args.preprocess_only + { object_files.push(obj_file); } } @@ -96,19 +106,20 @@ impl Compiler { info!("Compiling source file: {}", input_file.display()); // Read source file - let source = fs::read_to_string(input_file).await.map_err(|_e| { - AleccError::FileNotFound { - path: input_file.to_string_lossy().to_string(), - } - })?; + let source = + fs::read_to_string(input_file) + .await + .map_err(|_e| AleccError::FileNotFound { + path: input_file.to_string_lossy().to_string(), + })?; // Preprocessing let preprocessed = if self.args.preprocess_only { let output_path = self.get_output_path(input_file, "i")?; let preprocessed = self.preprocess(&source, input_file).await?; - fs::write(&output_path, preprocessed).await.map_err(|e| { - AleccError::IoError(e) - })?; + fs::write(&output_path, preprocessed) + .await + .map_err(|e| AleccError::IoError(e))?; return Ok(output_path); } else { self.preprocess(&source, input_file).await? @@ -136,17 +147,17 @@ impl Compiler { if self.args.assembly_only { let output_path = self.get_output_path(input_file, "s")?; - fs::write(&output_path, assembly).await.map_err(|e| { - AleccError::IoError(e) - })?; + fs::write(&output_path, assembly) + .await + .map_err(|e| AleccError::IoError(e))?; return Ok(output_path); } // Write assembly to temporary file let asm_path = self.create_temp_file("s")?; - fs::write(&asm_path, assembly).await.map_err(|e| { - AleccError::IoError(e) - })?; + fs::write(&asm_path, assembly) + .await + .map_err(|e| AleccError::IoError(e))?; // Assemble let obj_path = self.assemble_file(&asm_path).await?; @@ -156,7 +167,7 @@ impl Compiler { async fn preprocess(&self, source: &str, input_file: &Path) -> Result { debug!("Preprocessing {}", input_file.display()); - + // Simple preprocessing - just handle basic #include and #define let mut preprocessed = String::new(); let mut defines = std::collections::HashMap::new(); @@ -175,7 +186,7 @@ impl Compiler { // Process source line by line for line in source.lines() { let trimmed = line.trim(); - + if trimmed.starts_with("#include") { // Handle #include (simplified) match self.extract_include_file(trimmed) { @@ -238,7 +249,7 @@ impl Compiler { } } } - + if let Some(start) = line.find('<') { if let Some(end) = line.rfind('>') { if start != end { @@ -278,7 +289,7 @@ impl Compiler { ], Target::Amd64 => vec![ "/usr/include", - "/usr/local/include", + "/usr/local/include", "/usr/include/x86_64-linux-gnu", ], Target::Arm64 => vec![ @@ -311,12 +322,12 @@ impl Compiler { let assembler = match self.target { Target::I386 => "as", - Target::Amd64 => "as", + Target::Amd64 => "as", Target::Arm64 => "aarch64-linux-gnu-as", }; let mut command = Command::new(assembler); - + match self.target { Target::I386 => { command.args(&["--32"]); @@ -330,8 +341,9 @@ impl Compiler { } command.args(&[ - "-o", &obj_path.to_string_lossy(), - &asm_file.to_string_lossy() + "-o", + &obj_path.to_string_lossy(), + &asm_file.to_string_lossy(), ]); let output = command.output().map_err(|e| AleccError::CodegenError { @@ -352,7 +364,7 @@ impl Compiler { info!("Linking {} object files", object_files.len()); let mut linker = Linker::new(self.target); - + // Set output path let output_path = self.args.output.clone().unwrap_or_else(|| { if self.args.shared { @@ -401,20 +413,26 @@ impl Compiler { if let Some(ref output) = self.args.output { Ok(output.clone()) } else { - let stem = input_file.file_stem() + let stem = input_file + .file_stem() .ok_or_else(|| AleccError::InvalidArgument { message: "Invalid input file name".to_string(), })?; - Ok(PathBuf::from(format!("{}.{}", stem.to_string_lossy(), extension))) + Ok(PathBuf::from(format!( + "{}.{}", + stem.to_string_lossy(), + extension + ))) } } fn create_temp_file(&mut self, extension: &str) -> Result { - let temp_path = std::env::temp_dir() - .join(format!("alecc_{}_{}.{}", - std::process::id(), - self.temp_files.len(), - extension)); + let temp_path = std::env::temp_dir().join(format!( + "alecc_{}_{}.{}", + std::process::id(), + self.temp_files.len(), + extension + )); self.temp_files.push(temp_path.clone()); Ok(temp_path) } @@ -423,8 +441,11 @@ impl Compiler { for temp_file in &self.temp_files { if temp_file.exists() { if let Err(e) = fs::remove_file(temp_file).await { - warn!("Failed to remove temporary file {}: {}", - temp_file.display(), e); + warn!( + "Failed to remove temporary file {}: {}", + temp_file.display(), + e + ); } } } diff --git a/src/lexer.rs b/src/lexer.rs index 971f084..a26bad1 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -12,36 +12,116 @@ pub enum TokenType { Identifier(String), // Keywords - Auto, Break, Case, Char, Const, Continue, Default, Do, - Double, Else, Enum, Extern, Float, For, Goto, If, - Int, Long, Register, Return, Short, Signed, Sizeof, Static, - Struct, Switch, Typedef, Union, Unsigned, Void, Volatile, While, + Auto, + Break, + Case, + Char, + Const, + Continue, + Default, + Do, + Double, + Else, + Enum, + Extern, + Float, + For, + Goto, + If, + Int, + Long, + Register, + Return, + Short, + Signed, + Sizeof, + Static, + Struct, + Switch, + Typedef, + Union, + Unsigned, + Void, + Volatile, + While, // C++ Keywords - Bool, Class, Explicit, Export, False, Friend, Inline, Mutable, - Namespace, New, Operator, Private, Protected, Public, Template, - This, Throw, True, Try, Typename, Using, Virtual, + Bool, + Class, + Explicit, + Export, + False, + Friend, + Inline, + Mutable, + Namespace, + New, + Operator, + Private, + Protected, + Public, + Template, + This, + Throw, + True, + Try, + Typename, + Using, + Virtual, // Operators - Plus, Minus, Multiply, Divide, Modulo, - Assign, PlusAssign, MinusAssign, MultiplyAssign, DivideAssign, ModuloAssign, - Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual, - LogicalAnd, LogicalOr, LogicalNot, - BitwiseAnd, BitwiseOr, BitwiseXor, BitwiseNot, - LeftShift, RightShift, LeftShiftAssign, RightShiftAssign, - BitwiseAndAssign, BitwiseOrAssign, BitwiseXorAssign, - Increment, Decrement, - Arrow, Dot, Question, Colon, + Plus, + Minus, + Multiply, + Divide, + Modulo, + Assign, + PlusAssign, + MinusAssign, + MultiplyAssign, + DivideAssign, + ModuloAssign, + Equal, + NotEqual, + Less, + Greater, + LessEqual, + GreaterEqual, + LogicalAnd, + LogicalOr, + LogicalNot, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + BitwiseNot, + LeftShift, + RightShift, + LeftShiftAssign, + RightShiftAssign, + BitwiseAndAssign, + BitwiseOrAssign, + BitwiseXorAssign, + Increment, + Decrement, + Arrow, + Dot, + Question, + Colon, // Delimiters - LeftParen, RightParen, - LeftBrace, RightBrace, - LeftBracket, RightBracket, - Semicolon, Comma, + LeftParen, + RightParen, + LeftBrace, + RightBrace, + LeftBracket, + RightBracket, + Semicolon, + Comma, Ellipsis, // ... // Preprocessor - Hash, HashHash, + Hash, + HashHash, // Special Eof, @@ -100,10 +180,10 @@ impl Lexer { pub fn tokenize(&mut self) -> crate::error::Result> { let mut tokens = Vec::new(); - + while !self.is_at_end() { self.skip_whitespace(); - + if self.is_at_end() { break; } @@ -128,7 +208,7 @@ impl Lexer { fn scan_token(&mut self) -> crate::error::Result> { let c = self.advance(); - + match c { '+' => { if self.match_char('=') { @@ -365,7 +445,7 @@ impl Lexer { } self.advance(); } - + Err(crate::error::AleccError::LexError { line: self.line, column: self.column, @@ -375,13 +455,13 @@ impl Lexer { fn scan_string(&mut self) -> crate::error::Result> { let mut value = String::new(); - + while !self.is_at_end() && self.current_char() != '"' { if self.current_char() == '\n' { self.line += 1; self.column = 1; } - + if self.current_char() == '\\' { self.advance(); if !self.is_at_end() { @@ -402,7 +482,7 @@ impl Lexer { self.advance(); } } - + if self.is_at_end() { return Err(crate::error::AleccError::LexError { line: self.line, @@ -410,7 +490,7 @@ impl Lexer { message: "Unterminated string literal".to_string(), }); } - + self.advance(); // consume closing '"' Ok(Some(TokenType::StringLiteral(value))) } @@ -423,7 +503,7 @@ impl Lexer { message: "Unterminated character literal".to_string(), }); } - + let c = if self.current_char() == '\\' { self.advance(); if self.is_at_end() { @@ -445,9 +525,9 @@ impl Lexer { } else { self.current_char() }; - + self.advance(); - + if self.is_at_end() || self.current_char() != '\'' { return Err(crate::error::AleccError::LexError { line: self.line, @@ -455,30 +535,30 @@ impl Lexer { message: "Unterminated character literal".to_string(), }); } - + self.advance(); // consume closing '\'' Ok(Some(TokenType::CharLiteral(c))) } fn scan_number(&mut self) -> crate::error::Result> { let start = self.position - 1; - + while !self.is_at_end() && self.current_char().is_ascii_digit() { self.advance(); } - + let mut is_float = false; if !self.is_at_end() && self.current_char() == '.' && self.peek().is_ascii_digit() { is_float = true; self.advance(); // consume '.' - + while !self.is_at_end() && self.current_char().is_ascii_digit() { self.advance(); } } - + let text = &self.input[start..self.position]; - + if is_float { match text.parse::() { Ok(value) => Ok(Some(TokenType::FloatLiteral(value))), @@ -502,7 +582,7 @@ impl Lexer { fn scan_identifier(&mut self) -> crate::error::Result> { let start = self.position - 1; - + while !self.is_at_end() { let c = self.current_char(); if c.is_ascii_alphanumeric() || c == '_' { @@ -511,7 +591,7 @@ impl Lexer { break; } } - + let text = &self.input[start..self.position]; let token_type = match text { "auto" => TokenType::Auto, @@ -571,7 +651,7 @@ impl Lexer { "virtual" => TokenType::Virtual, _ => TokenType::Identifier(text.to_string()), }; - + Ok(Some(token_type)) } } diff --git a/src/lib.rs b/src/lib.rs index 65c148e..498f0be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,9 @@ -pub mod lexer; -pub mod parser; -pub mod codegen; -pub mod targets; -pub mod compiler; pub mod cli; +pub mod codegen; +pub mod compiler; pub mod error; -pub mod optimizer; +pub mod lexer; pub mod linker; +pub mod optimizer; +pub mod parser; +pub mod targets; diff --git a/src/linker.rs b/src/linker.rs index d773f0d..8669b76 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -1,5 +1,5 @@ -use crate::targets::Target; use crate::error::{AleccError, Result}; +use crate::targets::Target; use std::path::{Path, PathBuf}; use std::process::Command; @@ -88,7 +88,7 @@ impl Linker { } let linker_command = self.build_linker_command()?; - + let output = Command::new(&linker_command[0]) .args(&linker_command[1..]) .output() @@ -108,14 +108,14 @@ impl Linker { fn build_linker_command(&self) -> Result> { let mut command = Vec::new(); - + // Choose linker based on target let linker = match self.target { Target::I386 => "ld", Target::Amd64 => "ld", Target::Arm64 => "aarch64-linux-gnu-ld", }; - + command.push(linker.to_string()); // Target-specific flags @@ -238,10 +238,7 @@ impl Linker { "/usr/lib64", "/lib64", ], - Target::Arm64 => vec![ - "/usr/lib/aarch64-linux-gnu", - "/lib/aarch64-linux-gnu", - ], + Target::Arm64 => vec!["/usr/lib/aarch64-linux-gnu", "/lib/aarch64-linux-gnu"], }; for path in lib_paths { @@ -274,7 +271,7 @@ impl Linker { let libgcc_path = String::from_utf8_lossy(&output.stdout); let libgcc_path = libgcc_path.trim(); - + if let Some(parent) = Path::new(libgcc_path).parent() { Ok(parent.to_string_lossy().to_string()) } else { @@ -286,15 +283,15 @@ impl Linker { pub async fn link_shared_library(&self, soname: Option<&str>) -> Result<()> { let mut command = self.build_linker_command()?; - + // Remove executable-specific flags command.retain(|arg| arg != "-pie" && !arg.starts_with("-dynamic-linker")); - + // Add shared library flags if !command.contains(&"-shared".to_string()) { command.push("-shared".to_string()); } - + if let Some(soname) = soname { command.push("-soname".to_string()); command.push(soname.to_string()); @@ -322,7 +319,7 @@ impl Linker { // Use ar to create static library let mut command = vec!["ar".to_string(), "rcs".to_string()]; command.push(self.output_path.to_string_lossy().to_string()); - + for obj in &self.object_files { command.push(obj.to_string_lossy().to_string()); } diff --git a/src/main.rs b/src/main.rs index 6802aec..c162ca2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,19 +1,19 @@ -use clap::Parser; use anyhow::Result; -use tracing::{info, error}; +use clap::Parser; +use tracing::{error, info}; -mod compiler; -mod lexer; -mod parser; -mod codegen; -mod optimizer; -mod linker; -mod targets; mod cli; +mod codegen; +mod compiler; mod error; +mod lexer; +mod linker; +mod optimizer; +mod parser; +mod targets; -use compiler::Compiler; use cli::Args; +use compiler::Compiler; #[tokio::main] async fn main() -> Result<()> { @@ -21,11 +21,11 @@ async fn main() -> Result<()> { tracing_subscriber::fmt::init(); let args = Args::parse(); - + info!("Starting ALECC compiler v{}", env!("CARGO_PKG_VERSION")); - + let mut compiler = Compiler::new(args.clone())?; - + match compiler.compile().await { Ok(()) => { info!("Compilation completed successfully"); diff --git a/src/optimizer.rs b/src/optimizer.rs index 96f443c..7e9b8d8 100644 --- a/src/optimizer.rs +++ b/src/optimizer.rs @@ -1,5 +1,5 @@ -use crate::parser::Program; use crate::error::Result; +use crate::parser::Program; pub struct Optimizer { level: OptimizationLevel, @@ -40,9 +40,7 @@ impl Optimizer { // No optimization Ok(()) } - OptimizationLevel::Basic => { - self.basic_optimizations(program) - } + OptimizationLevel::Basic => self.basic_optimizations(program), OptimizationLevel::Moderate => { self.basic_optimizations(program)?; self.moderate_optimizations(program) @@ -67,59 +65,59 @@ impl Optimizer { fn basic_optimizations(&mut self, program: &mut Program) -> Result<()> { // Dead code elimination self.eliminate_dead_code(program)?; - + // Constant folding self.fold_constants(program)?; - + // Basic strength reduction self.basic_strength_reduction(program)?; - + Ok(()) } fn moderate_optimizations(&mut self, program: &mut Program) -> Result<()> { // Loop optimizations self.optimize_loops(program)?; - + // Function inlining (basic) self.inline_small_functions(program)?; - + // Common subexpression elimination self.eliminate_common_subexpressions(program)?; - + Ok(()) } fn aggressive_optimizations(&mut self, program: &mut Program) -> Result<()> { // Advanced loop optimizations self.advanced_loop_optimizations(program)?; - + // Aggressive function inlining self.aggressive_inlining(program)?; - + // Inter-procedural optimizations self.interprocedural_optimizations(program)?; - + // Vectorization self.auto_vectorization(program)?; - + Ok(()) } fn size_optimizations(&mut self, program: &mut Program) -> Result<()> { // Prefer smaller code sequences self.optimize_for_size(program)?; - + // Merge identical functions self.merge_identical_functions(program)?; - + Ok(()) } fn aggressive_size_optimizations(&mut self, program: &mut Program) -> Result<()> { // More aggressive size optimizations that might impact performance self.ultra_size_optimizations(program)?; - + Ok(()) } diff --git a/src/parser.rs b/src/parser.rs index 65cc99b..5bfa15b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,5 @@ -use crate::lexer::{Token, TokenType}; use crate::error::{AleccError, Result}; +use crate::lexer::{Token, TokenType}; use std::collections::HashMap; #[derive(Debug, Clone)] @@ -97,30 +97,59 @@ pub enum Expression { #[derive(Debug, Clone)] pub enum BinaryOperator { - Add, Subtract, Multiply, Divide, Modulo, - Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual, - LogicalAnd, LogicalOr, - BitwiseAnd, BitwiseOr, BitwiseXor, - LeftShift, RightShift, + Add, + Subtract, + Multiply, + Divide, + Modulo, + Equal, + NotEqual, + Less, + Greater, + LessEqual, + GreaterEqual, + LogicalAnd, + LogicalOr, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + LeftShift, + RightShift, } #[derive(Debug, Clone)] pub enum UnaryOperator { - Plus, Minus, LogicalNot, BitwiseNot, - PreIncrement, PostIncrement, - PreDecrement, PostDecrement, - AddressOf, Dereference, + Plus, + Minus, + LogicalNot, + BitwiseNot, + PreIncrement, + PostIncrement, + PreDecrement, + PostDecrement, + AddressOf, + Dereference, } #[derive(Debug, Clone)] pub enum AssignmentOperator { - Assign, PlusAssign, MinusAssign, MultiplyAssign, DivideAssign, - #[allow(dead_code)] ModuloAssign, - #[allow(dead_code)] BitwiseAndAssign, - #[allow(dead_code)] BitwiseOrAssign, - #[allow(dead_code)] BitwiseXorAssign, - #[allow(dead_code)] LeftShiftAssign, - #[allow(dead_code)] RightShiftAssign, + Assign, + PlusAssign, + MinusAssign, + MultiplyAssign, + DivideAssign, + #[allow(dead_code)] + ModuloAssign, + #[allow(dead_code)] + BitwiseAndAssign, + #[allow(dead_code)] + BitwiseOrAssign, + #[allow(dead_code)] + BitwiseXorAssign, + #[allow(dead_code)] + LeftShiftAssign, + #[allow(dead_code)] + RightShiftAssign, } #[derive(Debug, Clone)] @@ -235,9 +264,11 @@ impl Parser { } else { let storage_class = self.parse_storage_class(); let base_type = self.parse_type()?; - - if self.check(&TokenType::LeftParen) || - (self.check(&TokenType::Identifier("".to_string())) && self.peek_ahead(1)?.token_type == TokenType::LeftParen) { + + if self.check(&TokenType::LeftParen) + || (self.check(&TokenType::Identifier("".to_string())) + && self.peek_ahead(1)?.token_type == TokenType::LeftParen) + { self.parse_function_declaration(storage_class, base_type) } else { self.parse_variable_declaration(storage_class, base_type) @@ -300,7 +331,7 @@ impl Parser { }; let mut fields = Vec::new(); - + if self.match_token(&TokenType::LeftBrace) { while !self.check(&TokenType::RightBrace) && !self.is_at_end() { let field_type = self.parse_type()?; @@ -313,11 +344,14 @@ impl Parser { message: "Expected field name".to_string(), }); }; - - self.consume(&TokenType::Semicolon, "Expected ';' after field declaration")?; + + self.consume( + &TokenType::Semicolon, + "Expected ';' after field declaration", + )?; fields.push((field_name, field_type)); } - + self.consume(&TokenType::RightBrace, "Expected '}' after struct body")?; } @@ -337,7 +371,7 @@ impl Parser { }; let mut fields = Vec::new(); - + if self.match_token(&TokenType::LeftBrace) { while !self.check(&TokenType::RightBrace) && !self.is_at_end() { let field_type = self.parse_type()?; @@ -350,11 +384,14 @@ impl Parser { message: "Expected field name".to_string(), }); }; - - self.consume(&TokenType::Semicolon, "Expected ';' after field declaration")?; + + self.consume( + &TokenType::Semicolon, + "Expected ';' after field declaration", + )?; fields.push((field_name, field_type)); } - + self.consume(&TokenType::RightBrace, "Expected '}' after union body")?; } @@ -374,10 +411,11 @@ impl Parser { let mut variants = Vec::new(); let mut current_value = 0i64; - + if self.match_token(&TokenType::LeftBrace) { while !self.check(&TokenType::RightBrace) && !self.is_at_end() { - let variant_name = if let TokenType::Identifier(name) = &self.advance()?.token_type { + let variant_name = if let TokenType::Identifier(name) = &self.advance()?.token_type + { name.clone() } else { return Err(AleccError::ParseError { @@ -386,7 +424,7 @@ impl Parser { message: "Expected enum variant name".to_string(), }); }; - + if self.match_token(&TokenType::Assign) { if let TokenType::IntegerLiteral(value) = &self.advance()?.token_type { current_value = *value; @@ -398,15 +436,15 @@ impl Parser { }); } } - + variants.push((variant_name, current_value)); current_value += 1; - + if !self.check(&TokenType::RightBrace) { self.consume(&TokenType::Comma, "Expected ',' between enum variants")?; } } - + self.consume(&TokenType::RightBrace, "Expected '}' after enum body")?; } @@ -415,11 +453,13 @@ impl Parser { // Helper methods fn current_token(&self) -> Result<&Token> { - self.tokens.get(self.current).ok_or_else(|| AleccError::ParseError { - line: 0, - column: 0, - message: "Unexpected end of input".to_string(), - }) + self.tokens + .get(self.current) + .ok_or_else(|| AleccError::ParseError { + line: 0, + column: 0, + message: "Unexpected end of input".to_string(), + }) } fn advance(&mut self) -> Result<&Token> { @@ -445,32 +485,39 @@ impl Parser { } fn previous(&self) -> Result<&Token> { - self.tokens.get(self.current - 1).ok_or_else(|| AleccError::ParseError { - line: 0, - column: 0, - message: "No previous token".to_string(), - }) + self.tokens + .get(self.current - 1) + .ok_or_else(|| AleccError::ParseError { + line: 0, + column: 0, + message: "No previous token".to_string(), + }) } fn peek_ahead(&self, offset: usize) -> Result<&Token> { - self.tokens.get(self.current + offset).ok_or_else(|| AleccError::ParseError { - line: 0, - column: 0, - message: "Unexpected end of input".to_string(), - }) + self.tokens + .get(self.current + offset) + .ok_or_else(|| AleccError::ParseError { + line: 0, + column: 0, + message: "Unexpected end of input".to_string(), + }) } fn is_at_end(&self) -> bool { - self.current >= self.tokens.len() || - matches!(self.tokens.get(self.current).map(|t| &t.token_type), Some(TokenType::Eof)) + self.current >= self.tokens.len() + || matches!( + self.tokens.get(self.current).map(|t| &t.token_type), + Some(TokenType::Eof) + ) } fn check(&self, token_type: &TokenType) -> bool { if self.is_at_end() { false } else { - std::mem::discriminant(&self.current_token().unwrap().token_type) == - std::mem::discriminant(token_type) + std::mem::discriminant(&self.current_token().unwrap().token_type) + == std::mem::discriminant(token_type) } } @@ -521,12 +568,16 @@ impl Parser { message: "Expected typedef name".to_string(), }); }; - + self.consume(&TokenType::Semicolon, "Expected ';' after typedef")?; Ok(Declaration::TypeDef(name, base_type)) } - fn parse_function_declaration(&mut self, _storage: StorageClass, return_type: Type) -> Result { + fn parse_function_declaration( + &mut self, + _storage: StorageClass, + return_type: Type, + ) -> Result { let name = if let TokenType::Identifier(name) = &self.advance()?.token_type { name.clone() } else { @@ -538,16 +589,16 @@ impl Parser { }; self.consume(&TokenType::LeftParen, "Expected '(' after function name")?; - + let mut parameters = Vec::new(); let mut is_variadic = false; - + while !self.check(&TokenType::RightParen) && !self.is_at_end() { if self.match_token(&TokenType::Ellipsis) { is_variadic = true; break; } - + let param_type = self.parse_type()?; let param_name = if let TokenType::Identifier(name) = &self.advance()?.token_type { name.clone() @@ -558,21 +609,24 @@ impl Parser { message: "Expected parameter name".to_string(), }); }; - + parameters.push((param_name, param_type)); - + if !self.check(&TokenType::RightParen) { self.consume(&TokenType::Comma, "Expected ',' between parameters")?; } } - + self.consume(&TokenType::RightParen, "Expected ')' after parameters")?; - + let body = if self.check(&TokenType::LeftBrace) { self.advance()?; // Consume the LeftBrace self.parse_block_statement()? } else { - self.consume(&TokenType::Semicolon, "Expected ';' after function declaration")?; + self.consume( + &TokenType::Semicolon, + "Expected ';' after function declaration", + )?; Statement::Block(Vec::new()) // Forward declaration }; @@ -588,7 +642,11 @@ impl Parser { })) } - fn parse_variable_declaration(&mut self, _storage: StorageClass, var_type: Type) -> Result { + fn parse_variable_declaration( + &mut self, + _storage: StorageClass, + var_type: Type, + ) -> Result { let name = if let TokenType::Identifier(name) = &self.advance()?.token_type { name.clone() } else { @@ -605,8 +663,11 @@ impl Parser { None }; - self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?; - + self.consume( + &TokenType::Semicolon, + "Expected ';' after variable declaration", + )?; + Ok(Declaration::Variable(name, var_type, initializer)) } @@ -616,7 +677,7 @@ impl Parser { while !self.check(&TokenType::RightBrace) && !self.is_at_end() { statements.push(self.parse_statement()?); } - + self.consume(&TokenType::RightBrace, "Expected '}'")?; Ok(Statement::Block(statements)) } @@ -676,8 +737,11 @@ impl Parser { None }; - self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?; - + self.consume( + &TokenType::Semicolon, + "Expected ';' after variable declaration", + )?; + Ok(Statement::Declaration { name, var_type, @@ -695,14 +759,14 @@ impl Parser { self.consume(&TokenType::LeftParen, "Expected '(' after 'if'")?; let condition = self.parse_expression()?; self.consume(&TokenType::RightParen, "Expected ')' after if condition")?; - + let then_stmt = Box::new(self.parse_statement()?); let else_stmt = if self.match_token(&TokenType::Else) { Some(Box::new(self.parse_statement()?)) } else { None }; - + Ok(Statement::If { condition, then_stmt, @@ -715,39 +779,39 @@ impl Parser { let condition = self.parse_expression()?; self.consume(&TokenType::RightParen, "Expected ')' after while condition")?; let body = Box::new(self.parse_statement()?); - + Ok(Statement::While { condition, body }) } fn parse_for_statement(&mut self) -> Result { self.consume(&TokenType::LeftParen, "Expected '(' after 'for'")?; - + let init = if self.check(&TokenType::Semicolon) { None } else { Some(Box::new(self.parse_statement()?)) }; - + if init.is_none() { self.advance()?; // consume semicolon } - + let condition = if self.check(&TokenType::Semicolon) { None } else { Some(self.parse_expression()?) }; self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?; - + let increment = if self.check(&TokenType::RightParen) { None } else { Some(self.parse_expression()?) }; self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?; - + let body = Box::new(self.parse_statement()?); - + Ok(Statement::For { init, condition, @@ -757,10 +821,18 @@ impl Parser { } fn is_type(&self, token_type: &TokenType) -> bool { - matches!(token_type, - TokenType::Int | TokenType::Float | TokenType::Double | - TokenType::Char | TokenType::Void | TokenType::Short | - TokenType::Long | TokenType::Signed | TokenType::Unsigned) + matches!( + token_type, + TokenType::Int + | TokenType::Float + | TokenType::Double + | TokenType::Char + | TokenType::Void + | TokenType::Short + | TokenType::Long + | TokenType::Signed + | TokenType::Unsigned + ) } fn parse_expression(&mut self) -> Result { @@ -769,7 +841,7 @@ impl Parser { fn parse_assignment(&mut self) -> Result { let expr = self.parse_logical_or()?; - + if self.match_token(&TokenType::Assign) { let value = self.parse_assignment()?; // Right associative return Ok(Expression::Assignment { @@ -806,13 +878,13 @@ impl Parser { value: Box::new(value), }); } - + Ok(expr) } fn parse_logical_or(&mut self) -> Result { let mut expr = self.parse_logical_and()?; - + while self.match_token(&TokenType::LogicalOr) { let operator = BinaryOperator::LogicalOr; let right = self.parse_logical_and()?; @@ -822,13 +894,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_logical_and(&mut self) -> Result { let mut expr = self.parse_bitwise_or()?; - + while self.match_token(&TokenType::LogicalAnd) { let operator = BinaryOperator::LogicalAnd; let right = self.parse_bitwise_or()?; @@ -838,13 +910,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_bitwise_or(&mut self) -> Result { let mut expr = self.parse_bitwise_xor()?; - + while self.match_token(&TokenType::BitwiseOr) { let operator = BinaryOperator::BitwiseOr; let right = self.parse_bitwise_xor()?; @@ -854,13 +926,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_bitwise_xor(&mut self) -> Result { let mut expr = self.parse_bitwise_and()?; - + while self.match_token(&TokenType::BitwiseXor) { let operator = BinaryOperator::BitwiseXor; let right = self.parse_bitwise_and()?; @@ -870,13 +942,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_bitwise_and(&mut self) -> Result { let mut expr = self.parse_equality()?; - + while self.match_token(&TokenType::BitwiseAnd) { let operator = BinaryOperator::BitwiseAnd; let right = self.parse_equality()?; @@ -886,13 +958,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_equality(&mut self) -> Result { let mut expr = self.parse_comparison()?; - + while self.match_tokens(&[TokenType::Equal, TokenType::NotEqual]) { let operator = match self.previous()?.token_type { TokenType::Equal => BinaryOperator::Equal, @@ -906,15 +978,19 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_comparison(&mut self) -> Result { let mut expr = self.parse_shift()?; - - while self.match_tokens(&[TokenType::Greater, TokenType::GreaterEqual, - TokenType::Less, TokenType::LessEqual]) { + + while self.match_tokens(&[ + TokenType::Greater, + TokenType::GreaterEqual, + TokenType::Less, + TokenType::LessEqual, + ]) { let operator = match self.previous()?.token_type { TokenType::Greater => BinaryOperator::Greater, TokenType::GreaterEqual => BinaryOperator::GreaterEqual, @@ -929,13 +1005,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_shift(&mut self) -> Result { let mut expr = self.parse_term()?; - + while self.match_tokens(&[TokenType::LeftShift, TokenType::RightShift]) { let operator = match self.previous()?.token_type { TokenType::LeftShift => BinaryOperator::LeftShift, @@ -949,13 +1025,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_term(&mut self) -> Result { let mut expr = self.parse_factor()?; - + while self.match_tokens(&[TokenType::Minus, TokenType::Plus]) { let operator = match self.previous()?.token_type { TokenType::Minus => BinaryOperator::Subtract, @@ -969,13 +1045,13 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_factor(&mut self) -> Result { let mut expr = self.parse_unary()?; - + while self.match_tokens(&[TokenType::Divide, TokenType::Multiply, TokenType::Modulo]) { let operator = match self.previous()?.token_type { TokenType::Divide => BinaryOperator::Divide, @@ -990,12 +1066,21 @@ impl Parser { right: Box::new(right), }; } - + Ok(expr) } fn parse_unary(&mut self) -> Result { - if self.match_tokens(&[TokenType::LogicalNot, TokenType::Minus, TokenType::Plus, TokenType::Increment, TokenType::Decrement, TokenType::BitwiseAnd, TokenType::Multiply, TokenType::BitwiseNot]) { + if self.match_tokens(&[ + TokenType::LogicalNot, + TokenType::Minus, + TokenType::Plus, + TokenType::Increment, + TokenType::Decrement, + TokenType::BitwiseAnd, + TokenType::Multiply, + TokenType::BitwiseNot, + ]) { let operator = match self.previous()?.token_type { TokenType::LogicalNot => UnaryOperator::LogicalNot, TokenType::Minus => UnaryOperator::Minus, @@ -1013,13 +1098,13 @@ impl Parser { operand: Box::new(right), }); } - + self.parse_call() } fn parse_call(&mut self) -> Result { let mut expr = self.parse_primary()?; - + loop { if self.match_token(&TokenType::LeftParen) { expr = self.finish_call(expr)?; @@ -1045,13 +1130,13 @@ impl Parser { break; } } - + Ok(expr) } fn finish_call(&mut self, callee: Expression) -> Result { let mut arguments = Vec::new(); - + if !self.check(&TokenType::RightParen) { loop { arguments.push(self.parse_expression()?); @@ -1060,9 +1145,9 @@ impl Parser { } } } - + self.consume(&TokenType::RightParen, "Expected ')' after arguments")?; - + Ok(Expression::Call { function: Box::new(callee), arguments, @@ -1075,7 +1160,7 @@ impl Parser { self.consume(&TokenType::RightParen, "Expected ')' after expression")?; return Ok(expr); } - + let token = self.advance()?; match &token.token_type { TokenType::IntegerLiteral(value) => Ok(Expression::IntegerLiteral(*value)), diff --git a/src/targets.rs b/src/targets.rs index f101239..180a63c 100644 --- a/src/targets.rs +++ b/src/targets.rs @@ -19,13 +19,13 @@ impl Target { pub fn native() -> Self { #[cfg(target_arch = "x86")] return Target::I386; - + #[cfg(target_arch = "x86_64")] return Target::Amd64; - + #[cfg(target_arch = "aarch64")] return Target::Arm64; - + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))] return Target::Amd64; // Default fallback } @@ -77,7 +77,7 @@ impl Target { pub fn linker(&self) -> &'static str { match self { Target::I386 => "ld -m elf_i386", - Target::Amd64 => "ld -m elf_x86_64", + Target::Amd64 => "ld -m elf_x86_64", Target::Arm64 => "aarch64-linux-gnu-ld", } } @@ -113,9 +113,9 @@ impl Target { #[allow(dead_code)] #[derive(Debug, Clone, Copy)] pub enum CallingConvention { - Cdecl, // x86-32 - SystemV, // x86-64 - Aapcs64, // ARM64 + Cdecl, // x86-32 + SystemV, // x86-64 + Aapcs64, // ARM64 } #[allow(dead_code)] @@ -131,8 +131,15 @@ impl RegisterSet { pub fn general_purpose_registers(&self) -> &'static [&'static str] { match self { RegisterSet::X86_32 => &["eax", "ebx", "ecx", "edx", "esi", "edi"], - RegisterSet::X86_64 => &["rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"], - RegisterSet::Aarch64 => &["x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"], + RegisterSet::X86_64 => &[ + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", + "r14", "r15", + ], + RegisterSet::Aarch64 => &[ + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", + "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", + "x25", "x26", "x27", "x28", + ], } } diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 183ea39..97f1061 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1,11 +1,11 @@ #[cfg(test)] mod tests { + use alecc::cli::Args; + use alecc::codegen::CodeGenerator; + use alecc::compiler::Compiler; use alecc::lexer::{Lexer, TokenType}; use alecc::parser::Parser; - use alecc::codegen::CodeGenerator; use alecc::targets::Target; - use alecc::compiler::Compiler; - use alecc::cli::Args; use std::path::PathBuf; #[test] @@ -13,7 +13,7 @@ mod tests { let input = "int main() { return 0; }".to_string(); let mut lexer = Lexer::new(input); let tokens = lexer.tokenize().unwrap(); - + assert!(!tokens.is_empty()); assert!(matches!(tokens[0].token_type, TokenType::Int)); } @@ -23,8 +23,11 @@ mod tests { let input = "42 3.14 'a' \"hello\"".to_string(); let mut lexer = Lexer::new(input); let tokens = lexer.tokenize().unwrap(); - - assert!(matches!(tokens[0].token_type, TokenType::IntegerLiteral(42))); + + assert!(matches!( + tokens[0].token_type, + TokenType::IntegerLiteral(42) + )); assert!(matches!(tokens[1].token_type, TokenType::FloatLiteral(_))); assert!(matches!(tokens[2].token_type, TokenType::CharLiteral('a'))); assert!(matches!(tokens[3].token_type, TokenType::StringLiteral(_))); @@ -35,7 +38,7 @@ mod tests { let input = "+ - * / == != < > <= >=".to_string(); let mut lexer = Lexer::new(input); let tokens = lexer.tokenize().unwrap(); - + assert!(matches!(tokens[0].token_type, TokenType::Plus)); assert!(matches!(tokens[1].token_type, TokenType::Minus)); assert!(matches!(tokens[2].token_type, TokenType::Multiply)); @@ -49,9 +52,10 @@ mod tests { let input = "int x; // comment\n/* block comment */ int y;".to_string(); let mut lexer = Lexer::new(input); let tokens = lexer.tokenize().unwrap(); - + // Comments should be filtered out - let identifier_count = tokens.iter() + let identifier_count = tokens + .iter() .filter(|t| matches!(t.token_type, TokenType::Identifier(_))) .count(); assert_eq!(identifier_count, 2); // x and y @@ -64,7 +68,7 @@ mod tests { let tokens = lexer.tokenize().unwrap(); let mut parser = Parser::new(tokens); let program = parser.parse().unwrap(); - + assert_eq!(program.functions.len(), 1); assert_eq!(program.functions[0].name, "main"); } @@ -92,10 +96,10 @@ mod tests { let tokens = lexer.tokenize().unwrap(); let mut parser = Parser::new(tokens); let program = parser.parse().unwrap(); - + let mut codegen = CodeGenerator::new(Target::Amd64); let assembly = codegen.generate(&program).unwrap(); - + assert!(assembly.contains("main:")); assert!(assembly.contains("ret")); } @@ -136,13 +140,13 @@ mod tests { #[test] fn test_error_types() { use alecc::error::AleccError; - + let lex_error = AleccError::LexError { line: 1, column: 5, message: "Unexpected character".to_string(), }; - + assert!(format!("{}", lex_error).contains("line 1")); assert!(format!("{}", lex_error).contains("column 5")); }