Signed-off-by: ale <ale@manalejandro.com>
Este commit está contenido en:
ale
2025-08-23 13:13:15 +02:00
padre ce4ff63d0d
commit 0bee0c799b
Se han modificado 11 ficheros con 736 adiciones y 413 borrados

Ver fichero

@@ -1,9 +1,9 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use alecc::lexer::Lexer;
use alecc::parser::Parser;
use alecc::codegen::CodeGenerator;
use alecc::optimizer::{Optimizer, OptimizationLevel};
use alecc::lexer::Lexer;
use alecc::optimizer::{OptimizationLevel, Optimizer};
use alecc::parser::Parser;
use alecc::targets::Target;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
const SIMPLE_C_CODE: &str = r#"
int main() {
@@ -119,5 +119,11 @@ fn bench_optimizer(c: &mut Criterion) {
});
}
criterion_group!(benches, bench_lexer, bench_parser, bench_codegen, bench_optimizer);
criterion_group!(
benches,
bench_lexer,
bench_parser,
bench_codegen,
bench_optimizer
);
criterion_main!(benches);

Ver fichero

@@ -1,6 +1,8 @@
use crate::parser::{Program, Function, Expression, Statement, Type, BinaryOperator, UnaryOperator};
use crate::targets::Target;
use crate::error::{AleccError, Result};
use crate::parser::{
BinaryOperator, Expression, Function, Program, Statement, Type, UnaryOperator,
};
use crate::targets::Target;
use std::collections::HashMap;
pub struct CodeGenerator {
@@ -11,8 +13,8 @@ pub struct CodeGenerator {
current_function_params: Vec<(String, i32)>, // (name, stack_offset)
epilogue_emitted: bool,
local_variables: HashMap<String, i32>, // (name, stack_offset)
stack_offset: i32, // Current stack offset for local variables
last_call_stack_cleanup: usize, // Stack bytes to clean up after last call
stack_offset: i32, // Current stack offset for local variables
last_call_stack_cleanup: usize, // Stack bytes to clean up after last call
}
impl CodeGenerator {
@@ -35,9 +37,9 @@ impl CodeGenerator {
for function in &program.functions {
self.collect_string_literals_from_statement(&function.body)?;
}
self.emit_header();
// Generate string literals section
if !self.string_literals.is_empty() {
self.emit_line(".section .rodata");
@@ -74,23 +76,23 @@ impl CodeGenerator {
self.emit_line("");
self.emit_line(".globl _start");
self.emit_line("_start:");
// Set up stack and call main
self.emit_line(" push rbp");
self.emit_line(" mov rbp, rsp");
// Reserve space for temporary operations (ensures proper stack alignment)
// 120 bytes = 15*8, so after rbp push (8 bytes), total is 128 bytes = multiple of 16
self.emit_line(" sub rsp, 120");
// Call main function
self.emit_line(" call main");
// Exit syscall with main's return value
self.emit_line(" mov rdi, rax"); // exit status = main's return value
self.emit_line(" mov rax, 60"); // sys_exit syscall number
self.emit_line(" syscall"); // invoke syscall
self.emit_line(" mov rdi, rax"); // exit status = main's return value
self.emit_line(" mov rax, 60"); // sys_exit syscall number
self.emit_line(" syscall"); // invoke syscall
Ok(())
}
@@ -122,27 +124,27 @@ impl CodeGenerator {
// This is a function definition, generate the actual function
}
}
self.emit_line(&format!(".globl {}", function.name));
self.emit_line(&format!("{}:", function.name));
// Set up parameter tracking
self.current_function_params.clear();
self.local_variables.clear();
// Start local variables after parameters to avoid collision
self.stack_offset = -(function.parameters.len() as i32 * 8);
self.epilogue_emitted = false;
// Function prologue
self.emit_function_prologue(&function.parameters)?;
// Function body
self.generate_statement(&function.body)?;
// Function epilogue (always ensure we have a proper function ending)
// This handles cases where there might not be explicit returns in all paths
self.emit_function_epilogue()?;
self.emit_line("");
Ok(())
}
@@ -152,59 +154,64 @@ impl CodeGenerator {
Target::I386 => {
self.emit_line(" push ebp");
self.emit_line(" mov ebp, esp");
// Reserve space for parameters only (no extra temporaries for now)
let stack_space = parameters.len() * 4;
if stack_space > 0 {
self.emit_line(&format!(" sub esp, {}", stack_space));
}
// Store parameters from stack (i386 calling convention)
for (i, (name, _)) in parameters.iter().enumerate() {
let param_offset = -(i as i32 + 1) * 4;
let stack_offset = 8 + i as i32 * 4; // ebp + 8 + offset
self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", stack_offset));
self.emit_line(&format!(" mov DWORD PTR [ebp + {}], eax", param_offset));
self.current_function_params.push((name.clone(), param_offset));
self.current_function_params
.push((name.clone(), param_offset));
}
}
Target::Amd64 => {
self.emit_line(" push rbp");
self.emit_line(" mov rbp, rsp");
// Reserve space for parameters + ensure 16-byte alignment
let stack_space = parameters.len() * 8;
// Always reserve at least 8 bytes to maintain 16-byte alignment after rbp push
let min_space = if stack_space == 0 { 8 } else { stack_space };
let aligned_space = ((min_space + 15) / 16) * 16; // Round up to 16-byte boundary
self.emit_line(&format!(" sub rsp, {}", aligned_space));
// Store parameters from registers (x86_64 calling convention)
let param_registers = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"];
for (i, (name, _)) in parameters.iter().enumerate() {
let param_offset = -(i as i32 + 1) * 8;
if i < param_registers.len() {
// Parameter passed in register
self.emit_line(&format!(" mov QWORD PTR [rbp + {}], {}", param_offset, param_registers[i]));
self.emit_line(&format!(
" mov QWORD PTR [rbp + {}], {}",
param_offset, param_registers[i]
));
} else {
// Parameter passed on stack
let stack_offset = 16 + (i - param_registers.len()) as i32 * 8;
self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", stack_offset));
self.emit_line(&format!(" mov QWORD PTR [rbp + {}], rax", param_offset));
}
self.current_function_params.push((name.clone(), param_offset));
self.current_function_params
.push((name.clone(), param_offset));
}
}
Target::Arm64 => {
self.emit_line(" stp x29, x30, [sp, #-16]!");
self.emit_line(" mov x29, sp");
let stack_space = parameters.len() * 8;
if stack_space > 0 {
let aligned_space = (stack_space + 15) & !15; // 16-byte aligned
self.emit_line(&format!(" sub sp, sp, #{}", aligned_space));
}
// Store parameters from registers (ARM64 calling convention)
for (i, (name, _)) in parameters.iter().enumerate() {
let param_offset = -(i as i32 + 1) * 8;
@@ -217,7 +224,8 @@ impl CodeGenerator {
self.emit_line(&format!(" ldr x9, [x29, #{}]", stack_offset));
self.emit_line(&format!(" str x9, [x29, #{}]", param_offset));
}
self.current_function_params.push((name.clone(), param_offset));
self.current_function_params
.push((name.clone(), param_offset));
}
}
}
@@ -228,7 +236,7 @@ impl CodeGenerator {
if self.epilogue_emitted {
return Ok(()); // Don't emit duplicate epilogues
}
match self.target {
Target::I386 => {
self.emit_line(" mov esp, ebp");
@@ -246,7 +254,7 @@ impl CodeGenerator {
self.emit_line(" ret");
}
}
self.epilogue_emitted = true;
Ok(())
}
@@ -270,7 +278,7 @@ impl CodeGenerator {
self.emit_line(" ret");
}
}
self.epilogue_emitted = true;
Ok(())
}
@@ -280,30 +288,40 @@ impl CodeGenerator {
Statement::Expression(expr) => {
self.generate_expression(expr)?;
}
Statement::Declaration { name, var_type, initializer } => {
Statement::Declaration {
name,
var_type,
initializer,
} => {
// Calculate space needed based on type
let size = match var_type {
Type::Array(_, Some(length)) => length * 8, // Assuming 8-byte elements
Type::Array(_, None) => 80, // Default size for unsized arrays
_ => 8, // Default 8 bytes for simple types
Type::Array(_, None) => 80, // Default size for unsized arrays
_ => 8, // Default 8 bytes for simple types
};
// Allocate space for variable/array
self.stack_offset -= size as i32;
let var_offset = self.stack_offset;
// Store variable name and offset for later reference
self.local_variables.insert(name.clone(), var_offset);
if let Some(init_expr) = initializer {
self.generate_expression(init_expr)?;
// Store the value in the local variable slot
match self.target {
Target::Amd64 => {
self.emit_line(&format!(" mov QWORD PTR [rbp + {}], rax", var_offset));
self.emit_line(&format!(
" mov QWORD PTR [rbp + {}], rax",
var_offset
));
}
Target::I386 => {
self.emit_line(&format!(" mov DWORD PTR [ebp + {}], eax", var_offset));
self.emit_line(&format!(
" mov DWORD PTR [ebp + {}], eax",
var_offset
));
}
Target::Arm64 => {
self.emit_line(&format!(" str x0, [x29, #{}]", var_offset));
@@ -335,16 +353,20 @@ impl CodeGenerator {
self.generate_statement(stmt)?;
}
}
Statement::If { condition, then_stmt, else_stmt } => {
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
let else_label = self.new_label("else");
let end_label = self.new_label("endif");
self.generate_expression(condition)?;
self.emit_conditional_jump(false, &else_label)?;
self.generate_statement(then_stmt)?;
self.emit_jump(&end_label)?;
self.emit_line(&format!("{}:", else_label));
if let Some(else_stmt) = else_stmt {
// Reset epilogue flag for else branch in case it contains a return
@@ -356,47 +378,52 @@ impl CodeGenerator {
self.epilogue_emitted = saved_epilogue_state;
}
}
self.emit_line(&format!("{}:", end_label));
}
Statement::While { condition, body } => {
let loop_label = self.new_label("loop");
let end_label = self.new_label("endloop");
self.emit_line(&format!("{}:", loop_label));
self.generate_expression(condition)?;
self.emit_conditional_jump(false, &end_label)?;
self.generate_statement(body)?;
self.emit_jump(&loop_label)?;
self.emit_line(&format!("{}:", end_label));
}
Statement::For { init, condition, increment, body } => {
Statement::For {
init,
condition,
increment,
body,
} => {
// Generate initialization
if let Some(init_stmt) = init {
self.generate_statement(init_stmt)?;
}
let loop_label = self.new_label("forloop");
let end_label = self.new_label("endfor");
self.emit_line(&format!("{}:", loop_label));
// Generate condition check
if let Some(cond_expr) = condition {
self.generate_expression(cond_expr)?;
self.emit_conditional_jump(false, &end_label)?;
}
// Generate body
self.generate_statement(body)?;
// Generate increment
if let Some(inc_expr) = increment {
self.generate_expression(inc_expr)?;
}
self.emit_jump(&loop_label)?;
self.emit_line(&format!("{}:", end_label));
}
@@ -412,19 +439,17 @@ impl CodeGenerator {
fn generate_expression(&mut self, expression: &Expression) -> Result<()> {
match expression {
Expression::IntegerLiteral(value) => {
match self.target {
Target::I386 => {
self.emit_line(&format!(" mov eax, {}", value));
}
Target::Amd64 => {
self.emit_line(&format!(" mov rax, {}", value));
}
Target::Arm64 => {
self.emit_line(&format!(" mov x0, #{}", value));
}
Expression::IntegerLiteral(value) => match self.target {
Target::I386 => {
self.emit_line(&format!(" mov eax, {}", value));
}
}
Target::Amd64 => {
self.emit_line(&format!(" mov rax, {}", value));
}
Target::Arm64 => {
self.emit_line(&format!(" mov x0, #{}", value));
}
},
Expression::StringLiteral(value) => {
let label = self.get_string_literal_label(value);
match self.target {
@@ -442,7 +467,11 @@ impl CodeGenerator {
}
Expression::Identifier(name) => {
// Check if it's a function parameter first
if let Some((_, offset)) = self.current_function_params.iter().find(|(param_name, _)| param_name == name) {
if let Some((_, offset)) = self
.current_function_params
.iter()
.find(|(param_name, _)| param_name == name)
{
// Load parameter from stack
match self.target {
Target::I386 => {
@@ -485,7 +514,10 @@ impl CodeGenerator {
}
}
}
Expression::Call { function, arguments } => {
Expression::Call {
function,
arguments,
} => {
// Generate arguments and place in calling convention registers/stack
match self.target {
Target::I386 => {
@@ -498,13 +530,17 @@ impl CodeGenerator {
Target::Amd64 => {
// x86_64: first 6 args in registers, rest on stack
let param_registers = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"];
// Ensure stack alignment before function call
// Stack must be 16-byte aligned before 'call' instruction
// Since 'call' pushes 8 bytes (return address), we need stack to be 8 bytes off 16-byte boundary
let stack_args = if arguments.len() > param_registers.len() { arguments.len() - param_registers.len() } else { 0 };
let stack_args = if arguments.len() > param_registers.len() {
arguments.len() - param_registers.len()
} else {
0
};
let mut stack_cleanup_size = 0;
// Handle stack arguments if any
if stack_args > 0 {
let total_stack_bytes = stack_args * 8;
@@ -516,7 +552,7 @@ impl CodeGenerator {
stack_cleanup_size += stack_args * 8;
}
// Note: No additional alignment for register-only calls since function prologue handles it
// First, save any arguments that go on the stack (in reverse order)
if arguments.len() > param_registers.len() {
for arg in arguments.iter().skip(param_registers.len()).rev() {
@@ -524,14 +560,15 @@ impl CodeGenerator {
self.emit_line(" push rax");
}
}
// Then handle register arguments in reverse order to avoid overwriting
let reg_args: Vec<_> = arguments.iter().take(param_registers.len()).collect();
let reg_args: Vec<_> =
arguments.iter().take(param_registers.len()).collect();
for (i, arg) in reg_args.iter().enumerate().rev() {
self.generate_expression(arg)?;
self.emit_line(&format!(" mov {}, rax", param_registers[i]));
}
// Store cleanup size for later use
self.last_call_stack_cleanup = stack_cleanup_size;
}
@@ -544,7 +581,7 @@ impl CodeGenerator {
self.emit_line(" str x0, [sp, #-16]!");
}
}
// Then handle register arguments in reverse order
let reg_args: Vec<_> = arguments.iter().take(8).collect();
for (i, arg) in reg_args.iter().enumerate().rev() {
@@ -556,7 +593,7 @@ impl CodeGenerator {
}
}
}
if let Expression::Identifier(func_name) = function.as_ref() {
self.emit_line(&format!(" call {}", func_name));
} else {
@@ -564,7 +601,7 @@ impl CodeGenerator {
message: "Indirect function calls not implemented".to_string(),
});
}
// Clean up stack for arguments that were pushed
match self.target {
Target::I386 => {
@@ -576,73 +613,87 @@ impl CodeGenerator {
Target::Amd64 => {
// Clean up stack using stored cleanup size
if self.last_call_stack_cleanup > 0 {
self.emit_line(&format!(" add rsp, {}", self.last_call_stack_cleanup));
self.emit_line(&format!(
" add rsp, {}",
self.last_call_stack_cleanup
));
}
}
Target::Arm64 => {
// Clean up stack arguments (if any)
let stack_args = if arguments.len() > 8 { arguments.len() - 8 } else { 0 };
let stack_args = if arguments.len() > 8 {
arguments.len() - 8
} else {
0
};
if stack_args > 0 {
self.emit_line(&format!(" add sp, sp, #{}", stack_args * 16));
}
}
}
}
Expression::Binary { left, operator, right } => {
Expression::Binary {
left,
operator,
right,
} => {
// Generate binary operations
// First generate right operand and save it
self.generate_expression(right)?;
match self.target {
Target::I386 => {
self.emit_line(" push eax"); // Save right operand
self.emit_line(" push eax"); // Save right operand
}
Target::Amd64 => {
self.emit_line(" push rax"); // Save right operand
self.emit_line(" push rax"); // Save right operand
}
Target::Arm64 => {
self.emit_line(" str x0, [sp, #-16]!"); // Save right operand
self.emit_line(" str x0, [sp, #-16]!"); // Save right operand
}
}
// Generate left operand
self.generate_expression(left)?;
// Pop right operand and perform operation
match self.target {
Target::I386 => {
self.emit_line(" pop ebx"); // Right operand in ebx
self.emit_line(" pop ebx"); // Right operand in ebx
match operator {
BinaryOperator::Add => self.emit_line(" add eax, ebx"),
BinaryOperator::Subtract => self.emit_line(" sub eax, ebx"),
BinaryOperator::Multiply => self.emit_line(" imul eax, ebx"),
BinaryOperator::Divide => {
self.emit_line(" cdq"); // Sign extend eax to edx:eax
self.emit_line(" cdq"); // Sign extend eax to edx:eax
self.emit_line(" idiv ebx");
}
BinaryOperator::Modulo => {
self.emit_line(" cdq"); // Sign extend eax to edx:eax
self.emit_line(" cdq"); // Sign extend eax to edx:eax
self.emit_line(" idiv ebx");
self.emit_line(" mov eax, edx"); // Remainder is in edx
}
_ => {
return Err(AleccError::CodegenError {
message: format!("Binary operator {:?} not implemented for i386", operator),
message: format!(
"Binary operator {:?} not implemented for i386",
operator
),
});
}
}
}
Target::Amd64 => {
self.emit_line(" pop rbx"); // Right operand in rbx
self.emit_line(" pop rbx"); // Right operand in rbx
match operator {
BinaryOperator::Add => self.emit_line(" add rax, rbx"),
BinaryOperator::Subtract => self.emit_line(" sub rax, rbx"),
BinaryOperator::Multiply => self.emit_line(" imul rax, rbx"),
BinaryOperator::Divide => {
self.emit_line(" cqo"); // Sign extend rax to rdx:rax
self.emit_line(" cqo"); // Sign extend rax to rdx:rax
self.emit_line(" idiv rbx");
}
BinaryOperator::Modulo => {
self.emit_line(" cqo"); // Sign extend rax to rdx:rax
self.emit_line(" cqo"); // Sign extend rax to rdx:rax
self.emit_line(" idiv rbx");
self.emit_line(" mov rax, rdx"); // Remainder is in rdx
}
@@ -710,19 +761,22 @@ impl CodeGenerator {
}
}
Target::Arm64 => {
self.emit_line(" ldr x1, [sp], #16"); // Right operand in x1
self.emit_line(" ldr x1, [sp], #16"); // Right operand in x1
match operator {
BinaryOperator::Add => self.emit_line(" add x0, x0, x1"),
BinaryOperator::Subtract => self.emit_line(" sub x0, x0, x1"),
BinaryOperator::Multiply => self.emit_line(" mul x0, x0, x1"),
BinaryOperator::Divide => self.emit_line(" sdiv x0, x0, x1"),
BinaryOperator::Modulo => {
self.emit_line(" sdiv x2, x0, x1"); // x2 = x0 / x1
self.emit_line(" sdiv x2, x0, x1"); // x2 = x0 / x1
self.emit_line(" msub x0, x2, x1, x0"); // x0 = x0 - (x2 * x1)
}
_ => {
return Err(AleccError::CodegenError {
message: format!("Binary operator {:?} not implemented for arm64", operator),
message: format!(
"Binary operator {:?} not implemented for arm64",
operator
),
});
}
}
@@ -788,12 +842,24 @@ impl CodeGenerator {
if let Some(&offset) = self.local_variables.get(name) {
match self.target {
Target::I386 => {
self.emit_line(&format!(" inc DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(
" inc DWORD PTR [ebp + {}]",
offset
));
self.emit_line(&format!(
" mov eax, DWORD PTR [ebp + {}]",
offset
));
}
Target::Amd64 => {
self.emit_line(&format!(" inc QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(
" inc QWORD PTR [rbp + {}]",
offset
));
self.emit_line(&format!(
" mov rax, QWORD PTR [rbp + {}]",
offset
));
}
Target::Arm64 => {
self.emit_line(&format!(" ldr x0, [x29, #{}]", offset));
@@ -808,7 +874,8 @@ impl CodeGenerator {
}
} else {
return Err(AleccError::CodegenError {
message: "Pre-increment can only be applied to variables".to_string(),
message: "Pre-increment can only be applied to variables"
.to_string(),
});
}
}
@@ -818,12 +885,24 @@ impl CodeGenerator {
if let Some(&offset) = self.local_variables.get(name) {
match self.target {
Target::I386 => {
self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(" inc DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(
" mov eax, DWORD PTR [ebp + {}]",
offset
));
self.emit_line(&format!(
" inc DWORD PTR [ebp + {}]",
offset
));
}
Target::Amd64 => {
self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(" inc QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(
" mov rax, QWORD PTR [rbp + {}]",
offset
));
self.emit_line(&format!(
" inc QWORD PTR [rbp + {}]",
offset
));
}
Target::Arm64 => {
self.emit_line(&format!(" ldr x0, [x29, #{}]", offset));
@@ -839,7 +918,8 @@ impl CodeGenerator {
}
} else {
return Err(AleccError::CodegenError {
message: "Post-increment can only be applied to variables".to_string(),
message: "Post-increment can only be applied to variables"
.to_string(),
});
}
}
@@ -849,12 +929,24 @@ impl CodeGenerator {
if let Some(&offset) = self.local_variables.get(name) {
match self.target {
Target::I386 => {
self.emit_line(&format!(" dec DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(
" dec DWORD PTR [ebp + {}]",
offset
));
self.emit_line(&format!(
" mov eax, DWORD PTR [ebp + {}]",
offset
));
}
Target::Amd64 => {
self.emit_line(&format!(" dec QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(
" dec QWORD PTR [rbp + {}]",
offset
));
self.emit_line(&format!(
" mov rax, QWORD PTR [rbp + {}]",
offset
));
}
Target::Arm64 => {
self.emit_line(&format!(" ldr x0, [x29, #{}]", offset));
@@ -869,7 +961,8 @@ impl CodeGenerator {
}
} else {
return Err(AleccError::CodegenError {
message: "Pre-decrement can only be applied to variables".to_string(),
message: "Pre-decrement can only be applied to variables"
.to_string(),
});
}
}
@@ -879,12 +972,24 @@ impl CodeGenerator {
if let Some(&offset) = self.local_variables.get(name) {
match self.target {
Target::I386 => {
self.emit_line(&format!(" mov eax, DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(" dec DWORD PTR [ebp + {}]", offset));
self.emit_line(&format!(
" mov eax, DWORD PTR [ebp + {}]",
offset
));
self.emit_line(&format!(
" dec DWORD PTR [ebp + {}]",
offset
));
}
Target::Amd64 => {
self.emit_line(&format!(" mov rax, QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(" dec QWORD PTR [rbp + {}]", offset));
self.emit_line(&format!(
" mov rax, QWORD PTR [rbp + {}]",
offset
));
self.emit_line(&format!(
" dec QWORD PTR [rbp + {}]",
offset
));
}
Target::Arm64 => {
self.emit_line(&format!(" ldr x0, [x29, #{}]", offset));
@@ -900,7 +1005,8 @@ impl CodeGenerator {
}
} else {
return Err(AleccError::CodegenError {
message: "Post-decrement can only be applied to variables".to_string(),
message: "Post-decrement can only be applied to variables"
.to_string(),
});
}
}
@@ -953,13 +1059,13 @@ impl CodeGenerator {
if let Some(&base_offset) = self.local_variables.get(array_name) {
// Generate the index expression
self.generate_expression(index)?;
// Calculate the array element address: base + index * element_size
match self.target {
Target::Amd64 => {
// Multiply index by 8 (assuming int is 8 bytes for simplicity)
self.emit_line(" imul rax, 8"); // Use imul instead of mul
// Add base address
// Add base address
self.emit_line(&format!(" lea rbx, [rbp + {}]", base_offset));
self.emit_line(" add rax, rbx");
// Load the value at that address
@@ -991,7 +1097,11 @@ impl CodeGenerator {
});
}
}
Expression::Assignment { target, operator, value } => {
Expression::Assignment {
target,
operator,
value,
} => {
// Handle compound assignment operators
match operator {
crate::parser::AssignmentOperator::Assign => {
@@ -1105,7 +1215,8 @@ impl CodeGenerator {
}
} else {
return Err(AleccError::CodegenError {
message: "Complex assignment targets not supported for compound operators yet".to_string(),
message: "Complex assignment targets not supported for compound operators yet"
.to_string(),
});
}
Ok(())
@@ -1144,7 +1255,8 @@ impl CodeGenerator {
}
} else {
return Err(AleccError::CodegenError {
message: "Complex assignment targets not supported for compound operators yet".to_string(),
message: "Complex assignment targets not supported for compound operators yet"
.to_string(),
});
}
Ok(())
@@ -1152,7 +1264,7 @@ impl CodeGenerator {
fn emit_conditional_jump(&mut self, condition: bool, label: &str) -> Result<()> {
let instruction = if condition { "jnz" } else { "jz" };
match self.target {
Target::I386 | Target::Amd64 => {
self.emit_line(&format!(" test eax, eax"));
@@ -1209,7 +1321,8 @@ impl CodeGenerator {
label.clone()
} else {
let label = format!(".LC{}", self.string_literals.len());
self.string_literals.insert(content.to_string(), label.clone());
self.string_literals
.insert(content.to_string(), label.clone());
label
}
}
@@ -1248,7 +1361,11 @@ impl CodeGenerator {
}
Ok(())
}
Statement::If { condition, then_stmt, else_stmt } => {
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
self.collect_string_literals_from_expression(condition)?;
self.collect_string_literals_from_statement(then_stmt)?;
if let Some(else_statement) = else_stmt {
@@ -1261,7 +1378,12 @@ impl CodeGenerator {
self.collect_string_literals_from_statement(body)?;
Ok(())
}
Statement::For { init, condition, increment, body } => {
Statement::For {
init,
condition,
increment,
body,
} => {
if let Some(init_stmt) = init {
self.collect_string_literals_from_statement(init_stmt)?;
}
@@ -1280,7 +1402,7 @@ impl CodeGenerator {
}
Ok(())
}
_ => Ok(()) // Other statement types don't have expressions we need to collect
_ => Ok(()), // Other statement types don't have expressions we need to collect
}
}
@@ -1299,7 +1421,10 @@ impl CodeGenerator {
self.collect_string_literals_from_expression(operand)?;
Ok(())
}
Expression::Call { function, arguments } => {
Expression::Call {
function,
arguments,
} => {
self.collect_string_literals_from_expression(function)?;
for arg in arguments {
self.collect_string_literals_from_expression(arg)?;
@@ -1311,7 +1436,7 @@ impl CodeGenerator {
self.collect_string_literals_from_expression(value)?;
Ok(())
}
_ => Ok(()) // Other expression types don't contain string literals
_ => Ok(()), // Other expression types don't contain string literals
}
}
}

Ver fichero

@@ -1,15 +1,15 @@
use crate::cli::Args;
use crate::lexer::Lexer;
use crate::parser::Parser;
use crate::codegen::CodeGenerator;
use crate::optimizer::{Optimizer, OptimizationLevel};
use crate::linker::Linker;
use crate::targets::Target;
use crate::error::{AleccError, Result};
use crate::lexer::Lexer;
use crate::linker::Linker;
use crate::optimizer::{OptimizationLevel, Optimizer};
use crate::parser::Parser;
use crate::targets::Target;
use std::path::{Path, PathBuf};
use std::process::Command;
use tokio::fs;
use tracing::{info, debug, warn};
use tracing::{debug, info, warn};
pub struct Compiler {
args: Args,
@@ -19,11 +19,10 @@ pub struct Compiler {
impl Compiler {
pub fn new(args: Args) -> Result<Self> {
let target = Target::from_string(&args.target).ok_or_else(|| {
AleccError::UnsupportedTarget {
let target =
Target::from_string(&args.target).ok_or_else(|| AleccError::UnsupportedTarget {
target: args.target.clone(),
}
})?;
})?;
Ok(Self {
args,
@@ -39,9 +38,11 @@ impl Compiler {
});
}
info!("Compiling {} files for target {}",
self.args.input_files.len(),
self.target.as_str());
info!(
"Compiling {} files for target {}",
self.args.input_files.len(),
self.target.as_str()
);
let mut object_files = Vec::new();
let input_files = self.args.input_files.clone(); // Clone to avoid borrow issues
@@ -49,15 +50,19 @@ impl Compiler {
// Process each input file
for input_file in &input_files {
debug!("Processing file: {}", input_file.display());
let extension = input_file.extension()
let extension = input_file
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("");
match extension {
"c" | "cpp" | "cxx" | "cc" | "C" => {
let obj_file = self.compile_source_file(input_file).await?;
if !self.args.compile_only && !self.args.assembly_only && !self.args.preprocess_only {
if !self.args.compile_only
&& !self.args.assembly_only
&& !self.args.preprocess_only
{
object_files.push(obj_file);
}
}
@@ -71,10 +76,15 @@ impl Compiler {
object_files.push(input_file.clone());
}
_ => {
warn!("Unknown file extension for {}, treating as C source",
input_file.display());
warn!(
"Unknown file extension for {}, treating as C source",
input_file.display()
);
let obj_file = self.compile_source_file(input_file).await?;
if !self.args.compile_only && !self.args.assembly_only && !self.args.preprocess_only {
if !self.args.compile_only
&& !self.args.assembly_only
&& !self.args.preprocess_only
{
object_files.push(obj_file);
}
}
@@ -96,19 +106,20 @@ impl Compiler {
info!("Compiling source file: {}", input_file.display());
// Read source file
let source = fs::read_to_string(input_file).await.map_err(|_e| {
AleccError::FileNotFound {
path: input_file.to_string_lossy().to_string(),
}
})?;
let source =
fs::read_to_string(input_file)
.await
.map_err(|_e| AleccError::FileNotFound {
path: input_file.to_string_lossy().to_string(),
})?;
// Preprocessing
let preprocessed = if self.args.preprocess_only {
let output_path = self.get_output_path(input_file, "i")?;
let preprocessed = self.preprocess(&source, input_file).await?;
fs::write(&output_path, preprocessed).await.map_err(|e| {
AleccError::IoError(e)
})?;
fs::write(&output_path, preprocessed)
.await
.map_err(|e| AleccError::IoError(e))?;
return Ok(output_path);
} else {
self.preprocess(&source, input_file).await?
@@ -136,17 +147,17 @@ impl Compiler {
if self.args.assembly_only {
let output_path = self.get_output_path(input_file, "s")?;
fs::write(&output_path, assembly).await.map_err(|e| {
AleccError::IoError(e)
})?;
fs::write(&output_path, assembly)
.await
.map_err(|e| AleccError::IoError(e))?;
return Ok(output_path);
}
// Write assembly to temporary file
let asm_path = self.create_temp_file("s")?;
fs::write(&asm_path, assembly).await.map_err(|e| {
AleccError::IoError(e)
})?;
fs::write(&asm_path, assembly)
.await
.map_err(|e| AleccError::IoError(e))?;
// Assemble
let obj_path = self.assemble_file(&asm_path).await?;
@@ -156,7 +167,7 @@ impl Compiler {
async fn preprocess(&self, source: &str, input_file: &Path) -> Result<String> {
debug!("Preprocessing {}", input_file.display());
// Simple preprocessing - just handle basic #include and #define
let mut preprocessed = String::new();
let mut defines = std::collections::HashMap::new();
@@ -175,7 +186,7 @@ impl Compiler {
// Process source line by line
for line in source.lines() {
let trimmed = line.trim();
if trimmed.starts_with("#include") {
// Handle #include (simplified)
match self.extract_include_file(trimmed) {
@@ -238,7 +249,7 @@ impl Compiler {
}
}
}
if let Some(start) = line.find('<') {
if let Some(end) = line.rfind('>') {
if start != end {
@@ -278,7 +289,7 @@ impl Compiler {
],
Target::Amd64 => vec![
"/usr/include",
"/usr/local/include",
"/usr/local/include",
"/usr/include/x86_64-linux-gnu",
],
Target::Arm64 => vec![
@@ -311,12 +322,12 @@ impl Compiler {
let assembler = match self.target {
Target::I386 => "as",
Target::Amd64 => "as",
Target::Amd64 => "as",
Target::Arm64 => "aarch64-linux-gnu-as",
};
let mut command = Command::new(assembler);
match self.target {
Target::I386 => {
command.args(&["--32"]);
@@ -330,8 +341,9 @@ impl Compiler {
}
command.args(&[
"-o", &obj_path.to_string_lossy(),
&asm_file.to_string_lossy()
"-o",
&obj_path.to_string_lossy(),
&asm_file.to_string_lossy(),
]);
let output = command.output().map_err(|e| AleccError::CodegenError {
@@ -352,7 +364,7 @@ impl Compiler {
info!("Linking {} object files", object_files.len());
let mut linker = Linker::new(self.target);
// Set output path
let output_path = self.args.output.clone().unwrap_or_else(|| {
if self.args.shared {
@@ -401,20 +413,26 @@ impl Compiler {
if let Some(ref output) = self.args.output {
Ok(output.clone())
} else {
let stem = input_file.file_stem()
let stem = input_file
.file_stem()
.ok_or_else(|| AleccError::InvalidArgument {
message: "Invalid input file name".to_string(),
})?;
Ok(PathBuf::from(format!("{}.{}", stem.to_string_lossy(), extension)))
Ok(PathBuf::from(format!(
"{}.{}",
stem.to_string_lossy(),
extension
)))
}
}
fn create_temp_file(&mut self, extension: &str) -> Result<PathBuf> {
let temp_path = std::env::temp_dir()
.join(format!("alecc_{}_{}.{}",
std::process::id(),
self.temp_files.len(),
extension));
let temp_path = std::env::temp_dir().join(format!(
"alecc_{}_{}.{}",
std::process::id(),
self.temp_files.len(),
extension
));
self.temp_files.push(temp_path.clone());
Ok(temp_path)
}
@@ -423,8 +441,11 @@ impl Compiler {
for temp_file in &self.temp_files {
if temp_file.exists() {
if let Err(e) = fs::remove_file(temp_file).await {
warn!("Failed to remove temporary file {}: {}",
temp_file.display(), e);
warn!(
"Failed to remove temporary file {}: {}",
temp_file.display(),
e
);
}
}
}

Ver fichero

@@ -12,36 +12,116 @@ pub enum TokenType {
Identifier(String),
// Keywords
Auto, Break, Case, Char, Const, Continue, Default, Do,
Double, Else, Enum, Extern, Float, For, Goto, If,
Int, Long, Register, Return, Short, Signed, Sizeof, Static,
Struct, Switch, Typedef, Union, Unsigned, Void, Volatile, While,
Auto,
Break,
Case,
Char,
Const,
Continue,
Default,
Do,
Double,
Else,
Enum,
Extern,
Float,
For,
Goto,
If,
Int,
Long,
Register,
Return,
Short,
Signed,
Sizeof,
Static,
Struct,
Switch,
Typedef,
Union,
Unsigned,
Void,
Volatile,
While,
// C++ Keywords
Bool, Class, Explicit, Export, False, Friend, Inline, Mutable,
Namespace, New, Operator, Private, Protected, Public, Template,
This, Throw, True, Try, Typename, Using, Virtual,
Bool,
Class,
Explicit,
Export,
False,
Friend,
Inline,
Mutable,
Namespace,
New,
Operator,
Private,
Protected,
Public,
Template,
This,
Throw,
True,
Try,
Typename,
Using,
Virtual,
// Operators
Plus, Minus, Multiply, Divide, Modulo,
Assign, PlusAssign, MinusAssign, MultiplyAssign, DivideAssign, ModuloAssign,
Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual,
LogicalAnd, LogicalOr, LogicalNot,
BitwiseAnd, BitwiseOr, BitwiseXor, BitwiseNot,
LeftShift, RightShift, LeftShiftAssign, RightShiftAssign,
BitwiseAndAssign, BitwiseOrAssign, BitwiseXorAssign,
Increment, Decrement,
Arrow, Dot, Question, Colon,
Plus,
Minus,
Multiply,
Divide,
Modulo,
Assign,
PlusAssign,
MinusAssign,
MultiplyAssign,
DivideAssign,
ModuloAssign,
Equal,
NotEqual,
Less,
Greater,
LessEqual,
GreaterEqual,
LogicalAnd,
LogicalOr,
LogicalNot,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
BitwiseNot,
LeftShift,
RightShift,
LeftShiftAssign,
RightShiftAssign,
BitwiseAndAssign,
BitwiseOrAssign,
BitwiseXorAssign,
Increment,
Decrement,
Arrow,
Dot,
Question,
Colon,
// Delimiters
LeftParen, RightParen,
LeftBrace, RightBrace,
LeftBracket, RightBracket,
Semicolon, Comma,
LeftParen,
RightParen,
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
Semicolon,
Comma,
Ellipsis, // ...
// Preprocessor
Hash, HashHash,
Hash,
HashHash,
// Special
Eof,
@@ -100,10 +180,10 @@ impl Lexer {
pub fn tokenize(&mut self) -> crate::error::Result<Vec<Token>> {
let mut tokens = Vec::new();
while !self.is_at_end() {
self.skip_whitespace();
if self.is_at_end() {
break;
}
@@ -128,7 +208,7 @@ impl Lexer {
fn scan_token(&mut self) -> crate::error::Result<Option<TokenType>> {
let c = self.advance();
match c {
'+' => {
if self.match_char('=') {
@@ -365,7 +445,7 @@ impl Lexer {
}
self.advance();
}
Err(crate::error::AleccError::LexError {
line: self.line,
column: self.column,
@@ -375,13 +455,13 @@ impl Lexer {
fn scan_string(&mut self) -> crate::error::Result<Option<TokenType>> {
let mut value = String::new();
while !self.is_at_end() && self.current_char() != '"' {
if self.current_char() == '\n' {
self.line += 1;
self.column = 1;
}
if self.current_char() == '\\' {
self.advance();
if !self.is_at_end() {
@@ -402,7 +482,7 @@ impl Lexer {
self.advance();
}
}
if self.is_at_end() {
return Err(crate::error::AleccError::LexError {
line: self.line,
@@ -410,7 +490,7 @@ impl Lexer {
message: "Unterminated string literal".to_string(),
});
}
self.advance(); // consume closing '"'
Ok(Some(TokenType::StringLiteral(value)))
}
@@ -423,7 +503,7 @@ impl Lexer {
message: "Unterminated character literal".to_string(),
});
}
let c = if self.current_char() == '\\' {
self.advance();
if self.is_at_end() {
@@ -445,9 +525,9 @@ impl Lexer {
} else {
self.current_char()
};
self.advance();
if self.is_at_end() || self.current_char() != '\'' {
return Err(crate::error::AleccError::LexError {
line: self.line,
@@ -455,30 +535,30 @@ impl Lexer {
message: "Unterminated character literal".to_string(),
});
}
self.advance(); // consume closing '\''
Ok(Some(TokenType::CharLiteral(c)))
}
fn scan_number(&mut self) -> crate::error::Result<Option<TokenType>> {
let start = self.position - 1;
while !self.is_at_end() && self.current_char().is_ascii_digit() {
self.advance();
}
let mut is_float = false;
if !self.is_at_end() && self.current_char() == '.' && self.peek().is_ascii_digit() {
is_float = true;
self.advance(); // consume '.'
while !self.is_at_end() && self.current_char().is_ascii_digit() {
self.advance();
}
}
let text = &self.input[start..self.position];
if is_float {
match text.parse::<f64>() {
Ok(value) => Ok(Some(TokenType::FloatLiteral(value))),
@@ -502,7 +582,7 @@ impl Lexer {
fn scan_identifier(&mut self) -> crate::error::Result<Option<TokenType>> {
let start = self.position - 1;
while !self.is_at_end() {
let c = self.current_char();
if c.is_ascii_alphanumeric() || c == '_' {
@@ -511,7 +591,7 @@ impl Lexer {
break;
}
}
let text = &self.input[start..self.position];
let token_type = match text {
"auto" => TokenType::Auto,
@@ -571,7 +651,7 @@ impl Lexer {
"virtual" => TokenType::Virtual,
_ => TokenType::Identifier(text.to_string()),
};
Ok(Some(token_type))
}
}

Ver fichero

@@ -1,9 +1,9 @@
pub mod lexer;
pub mod parser;
pub mod codegen;
pub mod targets;
pub mod compiler;
pub mod cli;
pub mod codegen;
pub mod compiler;
pub mod error;
pub mod optimizer;
pub mod lexer;
pub mod linker;
pub mod optimizer;
pub mod parser;
pub mod targets;

Ver fichero

@@ -1,5 +1,5 @@
use crate::targets::Target;
use crate::error::{AleccError, Result};
use crate::targets::Target;
use std::path::{Path, PathBuf};
use std::process::Command;
@@ -88,7 +88,7 @@ impl Linker {
}
let linker_command = self.build_linker_command()?;
let output = Command::new(&linker_command[0])
.args(&linker_command[1..])
.output()
@@ -108,14 +108,14 @@ impl Linker {
fn build_linker_command(&self) -> Result<Vec<String>> {
let mut command = Vec::new();
// Choose linker based on target
let linker = match self.target {
Target::I386 => "ld",
Target::Amd64 => "ld",
Target::Arm64 => "aarch64-linux-gnu-ld",
};
command.push(linker.to_string());
// Target-specific flags
@@ -238,10 +238,7 @@ impl Linker {
"/usr/lib64",
"/lib64",
],
Target::Arm64 => vec![
"/usr/lib/aarch64-linux-gnu",
"/lib/aarch64-linux-gnu",
],
Target::Arm64 => vec!["/usr/lib/aarch64-linux-gnu", "/lib/aarch64-linux-gnu"],
};
for path in lib_paths {
@@ -274,7 +271,7 @@ impl Linker {
let libgcc_path = String::from_utf8_lossy(&output.stdout);
let libgcc_path = libgcc_path.trim();
if let Some(parent) = Path::new(libgcc_path).parent() {
Ok(parent.to_string_lossy().to_string())
} else {
@@ -286,15 +283,15 @@ impl Linker {
pub async fn link_shared_library(&self, soname: Option<&str>) -> Result<()> {
let mut command = self.build_linker_command()?;
// Remove executable-specific flags
command.retain(|arg| arg != "-pie" && !arg.starts_with("-dynamic-linker"));
// Add shared library flags
if !command.contains(&"-shared".to_string()) {
command.push("-shared".to_string());
}
if let Some(soname) = soname {
command.push("-soname".to_string());
command.push(soname.to_string());
@@ -322,7 +319,7 @@ impl Linker {
// Use ar to create static library
let mut command = vec!["ar".to_string(), "rcs".to_string()];
command.push(self.output_path.to_string_lossy().to_string());
for obj in &self.object_files {
command.push(obj.to_string_lossy().to_string());
}

Ver fichero

@@ -1,19 +1,19 @@
use clap::Parser;
use anyhow::Result;
use tracing::{info, error};
use clap::Parser;
use tracing::{error, info};
mod compiler;
mod lexer;
mod parser;
mod codegen;
mod optimizer;
mod linker;
mod targets;
mod cli;
mod codegen;
mod compiler;
mod error;
mod lexer;
mod linker;
mod optimizer;
mod parser;
mod targets;
use compiler::Compiler;
use cli::Args;
use compiler::Compiler;
#[tokio::main]
async fn main() -> Result<()> {
@@ -21,11 +21,11 @@ async fn main() -> Result<()> {
tracing_subscriber::fmt::init();
let args = Args::parse();
info!("Starting ALECC compiler v{}", env!("CARGO_PKG_VERSION"));
let mut compiler = Compiler::new(args.clone())?;
match compiler.compile().await {
Ok(()) => {
info!("Compilation completed successfully");

Ver fichero

@@ -1,5 +1,5 @@
use crate::parser::Program;
use crate::error::Result;
use crate::parser::Program;
pub struct Optimizer {
level: OptimizationLevel,
@@ -40,9 +40,7 @@ impl Optimizer {
// No optimization
Ok(())
}
OptimizationLevel::Basic => {
self.basic_optimizations(program)
}
OptimizationLevel::Basic => self.basic_optimizations(program),
OptimizationLevel::Moderate => {
self.basic_optimizations(program)?;
self.moderate_optimizations(program)
@@ -67,59 +65,59 @@ impl Optimizer {
fn basic_optimizations(&mut self, program: &mut Program) -> Result<()> {
// Dead code elimination
self.eliminate_dead_code(program)?;
// Constant folding
self.fold_constants(program)?;
// Basic strength reduction
self.basic_strength_reduction(program)?;
Ok(())
}
fn moderate_optimizations(&mut self, program: &mut Program) -> Result<()> {
// Loop optimizations
self.optimize_loops(program)?;
// Function inlining (basic)
self.inline_small_functions(program)?;
// Common subexpression elimination
self.eliminate_common_subexpressions(program)?;
Ok(())
}
fn aggressive_optimizations(&mut self, program: &mut Program) -> Result<()> {
// Advanced loop optimizations
self.advanced_loop_optimizations(program)?;
// Aggressive function inlining
self.aggressive_inlining(program)?;
// Inter-procedural optimizations
self.interprocedural_optimizations(program)?;
// Vectorization
self.auto_vectorization(program)?;
Ok(())
}
fn size_optimizations(&mut self, program: &mut Program) -> Result<()> {
// Prefer smaller code sequences
self.optimize_for_size(program)?;
// Merge identical functions
self.merge_identical_functions(program)?;
Ok(())
}
fn aggressive_size_optimizations(&mut self, program: &mut Program) -> Result<()> {
// More aggressive size optimizations that might impact performance
self.ultra_size_optimizations(program)?;
Ok(())
}

Ver fichero

@@ -1,5 +1,5 @@
use crate::lexer::{Token, TokenType};
use crate::error::{AleccError, Result};
use crate::lexer::{Token, TokenType};
use std::collections::HashMap;
#[derive(Debug, Clone)]
@@ -97,30 +97,59 @@ pub enum Expression {
#[derive(Debug, Clone)]
pub enum BinaryOperator {
Add, Subtract, Multiply, Divide, Modulo,
Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual,
LogicalAnd, LogicalOr,
BitwiseAnd, BitwiseOr, BitwiseXor,
LeftShift, RightShift,
Add,
Subtract,
Multiply,
Divide,
Modulo,
Equal,
NotEqual,
Less,
Greater,
LessEqual,
GreaterEqual,
LogicalAnd,
LogicalOr,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
LeftShift,
RightShift,
}
#[derive(Debug, Clone)]
pub enum UnaryOperator {
Plus, Minus, LogicalNot, BitwiseNot,
PreIncrement, PostIncrement,
PreDecrement, PostDecrement,
AddressOf, Dereference,
Plus,
Minus,
LogicalNot,
BitwiseNot,
PreIncrement,
PostIncrement,
PreDecrement,
PostDecrement,
AddressOf,
Dereference,
}
#[derive(Debug, Clone)]
pub enum AssignmentOperator {
Assign, PlusAssign, MinusAssign, MultiplyAssign, DivideAssign,
#[allow(dead_code)] ModuloAssign,
#[allow(dead_code)] BitwiseAndAssign,
#[allow(dead_code)] BitwiseOrAssign,
#[allow(dead_code)] BitwiseXorAssign,
#[allow(dead_code)] LeftShiftAssign,
#[allow(dead_code)] RightShiftAssign,
Assign,
PlusAssign,
MinusAssign,
MultiplyAssign,
DivideAssign,
#[allow(dead_code)]
ModuloAssign,
#[allow(dead_code)]
BitwiseAndAssign,
#[allow(dead_code)]
BitwiseOrAssign,
#[allow(dead_code)]
BitwiseXorAssign,
#[allow(dead_code)]
LeftShiftAssign,
#[allow(dead_code)]
RightShiftAssign,
}
#[derive(Debug, Clone)]
@@ -235,9 +264,11 @@ impl Parser {
} else {
let storage_class = self.parse_storage_class();
let base_type = self.parse_type()?;
if self.check(&TokenType::LeftParen) ||
(self.check(&TokenType::Identifier("".to_string())) && self.peek_ahead(1)?.token_type == TokenType::LeftParen) {
if self.check(&TokenType::LeftParen)
|| (self.check(&TokenType::Identifier("".to_string()))
&& self.peek_ahead(1)?.token_type == TokenType::LeftParen)
{
self.parse_function_declaration(storage_class, base_type)
} else {
self.parse_variable_declaration(storage_class, base_type)
@@ -300,7 +331,7 @@ impl Parser {
};
let mut fields = Vec::new();
if self.match_token(&TokenType::LeftBrace) {
while !self.check(&TokenType::RightBrace) && !self.is_at_end() {
let field_type = self.parse_type()?;
@@ -313,11 +344,14 @@ impl Parser {
message: "Expected field name".to_string(),
});
};
self.consume(&TokenType::Semicolon, "Expected ';' after field declaration")?;
self.consume(
&TokenType::Semicolon,
"Expected ';' after field declaration",
)?;
fields.push((field_name, field_type));
}
self.consume(&TokenType::RightBrace, "Expected '}' after struct body")?;
}
@@ -337,7 +371,7 @@ impl Parser {
};
let mut fields = Vec::new();
if self.match_token(&TokenType::LeftBrace) {
while !self.check(&TokenType::RightBrace) && !self.is_at_end() {
let field_type = self.parse_type()?;
@@ -350,11 +384,14 @@ impl Parser {
message: "Expected field name".to_string(),
});
};
self.consume(&TokenType::Semicolon, "Expected ';' after field declaration")?;
self.consume(
&TokenType::Semicolon,
"Expected ';' after field declaration",
)?;
fields.push((field_name, field_type));
}
self.consume(&TokenType::RightBrace, "Expected '}' after union body")?;
}
@@ -374,10 +411,11 @@ impl Parser {
let mut variants = Vec::new();
let mut current_value = 0i64;
if self.match_token(&TokenType::LeftBrace) {
while !self.check(&TokenType::RightBrace) && !self.is_at_end() {
let variant_name = if let TokenType::Identifier(name) = &self.advance()?.token_type {
let variant_name = if let TokenType::Identifier(name) = &self.advance()?.token_type
{
name.clone()
} else {
return Err(AleccError::ParseError {
@@ -386,7 +424,7 @@ impl Parser {
message: "Expected enum variant name".to_string(),
});
};
if self.match_token(&TokenType::Assign) {
if let TokenType::IntegerLiteral(value) = &self.advance()?.token_type {
current_value = *value;
@@ -398,15 +436,15 @@ impl Parser {
});
}
}
variants.push((variant_name, current_value));
current_value += 1;
if !self.check(&TokenType::RightBrace) {
self.consume(&TokenType::Comma, "Expected ',' between enum variants")?;
}
}
self.consume(&TokenType::RightBrace, "Expected '}' after enum body")?;
}
@@ -415,11 +453,13 @@ impl Parser {
// Helper methods
fn current_token(&self) -> Result<&Token> {
self.tokens.get(self.current).ok_or_else(|| AleccError::ParseError {
line: 0,
column: 0,
message: "Unexpected end of input".to_string(),
})
self.tokens
.get(self.current)
.ok_or_else(|| AleccError::ParseError {
line: 0,
column: 0,
message: "Unexpected end of input".to_string(),
})
}
fn advance(&mut self) -> Result<&Token> {
@@ -445,32 +485,39 @@ impl Parser {
}
fn previous(&self) -> Result<&Token> {
self.tokens.get(self.current - 1).ok_or_else(|| AleccError::ParseError {
line: 0,
column: 0,
message: "No previous token".to_string(),
})
self.tokens
.get(self.current - 1)
.ok_or_else(|| AleccError::ParseError {
line: 0,
column: 0,
message: "No previous token".to_string(),
})
}
fn peek_ahead(&self, offset: usize) -> Result<&Token> {
self.tokens.get(self.current + offset).ok_or_else(|| AleccError::ParseError {
line: 0,
column: 0,
message: "Unexpected end of input".to_string(),
})
self.tokens
.get(self.current + offset)
.ok_or_else(|| AleccError::ParseError {
line: 0,
column: 0,
message: "Unexpected end of input".to_string(),
})
}
fn is_at_end(&self) -> bool {
self.current >= self.tokens.len() ||
matches!(self.tokens.get(self.current).map(|t| &t.token_type), Some(TokenType::Eof))
self.current >= self.tokens.len()
|| matches!(
self.tokens.get(self.current).map(|t| &t.token_type),
Some(TokenType::Eof)
)
}
fn check(&self, token_type: &TokenType) -> bool {
if self.is_at_end() {
false
} else {
std::mem::discriminant(&self.current_token().unwrap().token_type) ==
std::mem::discriminant(token_type)
std::mem::discriminant(&self.current_token().unwrap().token_type)
== std::mem::discriminant(token_type)
}
}
@@ -521,12 +568,16 @@ impl Parser {
message: "Expected typedef name".to_string(),
});
};
self.consume(&TokenType::Semicolon, "Expected ';' after typedef")?;
Ok(Declaration::TypeDef(name, base_type))
}
fn parse_function_declaration(&mut self, _storage: StorageClass, return_type: Type) -> Result<Declaration> {
fn parse_function_declaration(
&mut self,
_storage: StorageClass,
return_type: Type,
) -> Result<Declaration> {
let name = if let TokenType::Identifier(name) = &self.advance()?.token_type {
name.clone()
} else {
@@ -538,16 +589,16 @@ impl Parser {
};
self.consume(&TokenType::LeftParen, "Expected '(' after function name")?;
let mut parameters = Vec::new();
let mut is_variadic = false;
while !self.check(&TokenType::RightParen) && !self.is_at_end() {
if self.match_token(&TokenType::Ellipsis) {
is_variadic = true;
break;
}
let param_type = self.parse_type()?;
let param_name = if let TokenType::Identifier(name) = &self.advance()?.token_type {
name.clone()
@@ -558,21 +609,24 @@ impl Parser {
message: "Expected parameter name".to_string(),
});
};
parameters.push((param_name, param_type));
if !self.check(&TokenType::RightParen) {
self.consume(&TokenType::Comma, "Expected ',' between parameters")?;
}
}
self.consume(&TokenType::RightParen, "Expected ')' after parameters")?;
let body = if self.check(&TokenType::LeftBrace) {
self.advance()?; // Consume the LeftBrace
self.parse_block_statement()?
} else {
self.consume(&TokenType::Semicolon, "Expected ';' after function declaration")?;
self.consume(
&TokenType::Semicolon,
"Expected ';' after function declaration",
)?;
Statement::Block(Vec::new()) // Forward declaration
};
@@ -588,7 +642,11 @@ impl Parser {
}))
}
fn parse_variable_declaration(&mut self, _storage: StorageClass, var_type: Type) -> Result<Declaration> {
fn parse_variable_declaration(
&mut self,
_storage: StorageClass,
var_type: Type,
) -> Result<Declaration> {
let name = if let TokenType::Identifier(name) = &self.advance()?.token_type {
name.clone()
} else {
@@ -605,8 +663,11 @@ impl Parser {
None
};
self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?;
self.consume(
&TokenType::Semicolon,
"Expected ';' after variable declaration",
)?;
Ok(Declaration::Variable(name, var_type, initializer))
}
@@ -616,7 +677,7 @@ impl Parser {
while !self.check(&TokenType::RightBrace) && !self.is_at_end() {
statements.push(self.parse_statement()?);
}
self.consume(&TokenType::RightBrace, "Expected '}'")?;
Ok(Statement::Block(statements))
}
@@ -676,8 +737,11 @@ impl Parser {
None
};
self.consume(&TokenType::Semicolon, "Expected ';' after variable declaration")?;
self.consume(
&TokenType::Semicolon,
"Expected ';' after variable declaration",
)?;
Ok(Statement::Declaration {
name,
var_type,
@@ -695,14 +759,14 @@ impl Parser {
self.consume(&TokenType::LeftParen, "Expected '(' after 'if'")?;
let condition = self.parse_expression()?;
self.consume(&TokenType::RightParen, "Expected ')' after if condition")?;
let then_stmt = Box::new(self.parse_statement()?);
let else_stmt = if self.match_token(&TokenType::Else) {
Some(Box::new(self.parse_statement()?))
} else {
None
};
Ok(Statement::If {
condition,
then_stmt,
@@ -715,39 +779,39 @@ impl Parser {
let condition = self.parse_expression()?;
self.consume(&TokenType::RightParen, "Expected ')' after while condition")?;
let body = Box::new(self.parse_statement()?);
Ok(Statement::While { condition, body })
}
fn parse_for_statement(&mut self) -> Result<Statement> {
self.consume(&TokenType::LeftParen, "Expected '(' after 'for'")?;
let init = if self.check(&TokenType::Semicolon) {
None
} else {
Some(Box::new(self.parse_statement()?))
};
if init.is_none() {
self.advance()?; // consume semicolon
}
let condition = if self.check(&TokenType::Semicolon) {
None
} else {
Some(self.parse_expression()?)
};
self.consume(&TokenType::Semicolon, "Expected ';' after for condition")?;
let increment = if self.check(&TokenType::RightParen) {
None
} else {
Some(self.parse_expression()?)
};
self.consume(&TokenType::RightParen, "Expected ')' after for clauses")?;
let body = Box::new(self.parse_statement()?);
Ok(Statement::For {
init,
condition,
@@ -757,10 +821,18 @@ impl Parser {
}
fn is_type(&self, token_type: &TokenType) -> bool {
matches!(token_type,
TokenType::Int | TokenType::Float | TokenType::Double |
TokenType::Char | TokenType::Void | TokenType::Short |
TokenType::Long | TokenType::Signed | TokenType::Unsigned)
matches!(
token_type,
TokenType::Int
| TokenType::Float
| TokenType::Double
| TokenType::Char
| TokenType::Void
| TokenType::Short
| TokenType::Long
| TokenType::Signed
| TokenType::Unsigned
)
}
fn parse_expression(&mut self) -> Result<Expression> {
@@ -769,7 +841,7 @@ impl Parser {
fn parse_assignment(&mut self) -> Result<Expression> {
let expr = self.parse_logical_or()?;
if self.match_token(&TokenType::Assign) {
let value = self.parse_assignment()?; // Right associative
return Ok(Expression::Assignment {
@@ -806,13 +878,13 @@ impl Parser {
value: Box::new(value),
});
}
Ok(expr)
}
fn parse_logical_or(&mut self) -> Result<Expression> {
let mut expr = self.parse_logical_and()?;
while self.match_token(&TokenType::LogicalOr) {
let operator = BinaryOperator::LogicalOr;
let right = self.parse_logical_and()?;
@@ -822,13 +894,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_logical_and(&mut self) -> Result<Expression> {
let mut expr = self.parse_bitwise_or()?;
while self.match_token(&TokenType::LogicalAnd) {
let operator = BinaryOperator::LogicalAnd;
let right = self.parse_bitwise_or()?;
@@ -838,13 +910,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_bitwise_or(&mut self) -> Result<Expression> {
let mut expr = self.parse_bitwise_xor()?;
while self.match_token(&TokenType::BitwiseOr) {
let operator = BinaryOperator::BitwiseOr;
let right = self.parse_bitwise_xor()?;
@@ -854,13 +926,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_bitwise_xor(&mut self) -> Result<Expression> {
let mut expr = self.parse_bitwise_and()?;
while self.match_token(&TokenType::BitwiseXor) {
let operator = BinaryOperator::BitwiseXor;
let right = self.parse_bitwise_and()?;
@@ -870,13 +942,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_bitwise_and(&mut self) -> Result<Expression> {
let mut expr = self.parse_equality()?;
while self.match_token(&TokenType::BitwiseAnd) {
let operator = BinaryOperator::BitwiseAnd;
let right = self.parse_equality()?;
@@ -886,13 +958,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_equality(&mut self) -> Result<Expression> {
let mut expr = self.parse_comparison()?;
while self.match_tokens(&[TokenType::Equal, TokenType::NotEqual]) {
let operator = match self.previous()?.token_type {
TokenType::Equal => BinaryOperator::Equal,
@@ -906,15 +978,19 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_comparison(&mut self) -> Result<Expression> {
let mut expr = self.parse_shift()?;
while self.match_tokens(&[TokenType::Greater, TokenType::GreaterEqual,
TokenType::Less, TokenType::LessEqual]) {
while self.match_tokens(&[
TokenType::Greater,
TokenType::GreaterEqual,
TokenType::Less,
TokenType::LessEqual,
]) {
let operator = match self.previous()?.token_type {
TokenType::Greater => BinaryOperator::Greater,
TokenType::GreaterEqual => BinaryOperator::GreaterEqual,
@@ -929,13 +1005,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_shift(&mut self) -> Result<Expression> {
let mut expr = self.parse_term()?;
while self.match_tokens(&[TokenType::LeftShift, TokenType::RightShift]) {
let operator = match self.previous()?.token_type {
TokenType::LeftShift => BinaryOperator::LeftShift,
@@ -949,13 +1025,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_term(&mut self) -> Result<Expression> {
let mut expr = self.parse_factor()?;
while self.match_tokens(&[TokenType::Minus, TokenType::Plus]) {
let operator = match self.previous()?.token_type {
TokenType::Minus => BinaryOperator::Subtract,
@@ -969,13 +1045,13 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_factor(&mut self) -> Result<Expression> {
let mut expr = self.parse_unary()?;
while self.match_tokens(&[TokenType::Divide, TokenType::Multiply, TokenType::Modulo]) {
let operator = match self.previous()?.token_type {
TokenType::Divide => BinaryOperator::Divide,
@@ -990,12 +1066,21 @@ impl Parser {
right: Box::new(right),
};
}
Ok(expr)
}
fn parse_unary(&mut self) -> Result<Expression> {
if self.match_tokens(&[TokenType::LogicalNot, TokenType::Minus, TokenType::Plus, TokenType::Increment, TokenType::Decrement, TokenType::BitwiseAnd, TokenType::Multiply, TokenType::BitwiseNot]) {
if self.match_tokens(&[
TokenType::LogicalNot,
TokenType::Minus,
TokenType::Plus,
TokenType::Increment,
TokenType::Decrement,
TokenType::BitwiseAnd,
TokenType::Multiply,
TokenType::BitwiseNot,
]) {
let operator = match self.previous()?.token_type {
TokenType::LogicalNot => UnaryOperator::LogicalNot,
TokenType::Minus => UnaryOperator::Minus,
@@ -1013,13 +1098,13 @@ impl Parser {
operand: Box::new(right),
});
}
self.parse_call()
}
fn parse_call(&mut self) -> Result<Expression> {
let mut expr = self.parse_primary()?;
loop {
if self.match_token(&TokenType::LeftParen) {
expr = self.finish_call(expr)?;
@@ -1045,13 +1130,13 @@ impl Parser {
break;
}
}
Ok(expr)
}
fn finish_call(&mut self, callee: Expression) -> Result<Expression> {
let mut arguments = Vec::new();
if !self.check(&TokenType::RightParen) {
loop {
arguments.push(self.parse_expression()?);
@@ -1060,9 +1145,9 @@ impl Parser {
}
}
}
self.consume(&TokenType::RightParen, "Expected ')' after arguments")?;
Ok(Expression::Call {
function: Box::new(callee),
arguments,
@@ -1075,7 +1160,7 @@ impl Parser {
self.consume(&TokenType::RightParen, "Expected ')' after expression")?;
return Ok(expr);
}
let token = self.advance()?;
match &token.token_type {
TokenType::IntegerLiteral(value) => Ok(Expression::IntegerLiteral(*value)),

Ver fichero

@@ -19,13 +19,13 @@ impl Target {
pub fn native() -> Self {
#[cfg(target_arch = "x86")]
return Target::I386;
#[cfg(target_arch = "x86_64")]
return Target::Amd64;
#[cfg(target_arch = "aarch64")]
return Target::Arm64;
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
return Target::Amd64; // Default fallback
}
@@ -77,7 +77,7 @@ impl Target {
pub fn linker(&self) -> &'static str {
match self {
Target::I386 => "ld -m elf_i386",
Target::Amd64 => "ld -m elf_x86_64",
Target::Amd64 => "ld -m elf_x86_64",
Target::Arm64 => "aarch64-linux-gnu-ld",
}
}
@@ -113,9 +113,9 @@ impl Target {
#[allow(dead_code)]
#[derive(Debug, Clone, Copy)]
pub enum CallingConvention {
Cdecl, // x86-32
SystemV, // x86-64
Aapcs64, // ARM64
Cdecl, // x86-32
SystemV, // x86-64
Aapcs64, // ARM64
}
#[allow(dead_code)]
@@ -131,8 +131,15 @@ impl RegisterSet {
pub fn general_purpose_registers(&self) -> &'static [&'static str] {
match self {
RegisterSet::X86_32 => &["eax", "ebx", "ecx", "edx", "esi", "edi"],
RegisterSet::X86_64 => &["rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"],
RegisterSet::Aarch64 => &["x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"],
RegisterSet::X86_64 => &[
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13",
"r14", "r15",
],
RegisterSet::Aarch64 => &[
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12",
"x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24",
"x25", "x26", "x27", "x28",
],
}
}

Ver fichero

@@ -1,11 +1,11 @@
#[cfg(test)]
mod tests {
use alecc::cli::Args;
use alecc::codegen::CodeGenerator;
use alecc::compiler::Compiler;
use alecc::lexer::{Lexer, TokenType};
use alecc::parser::Parser;
use alecc::codegen::CodeGenerator;
use alecc::targets::Target;
use alecc::compiler::Compiler;
use alecc::cli::Args;
use std::path::PathBuf;
#[test]
@@ -13,7 +13,7 @@ mod tests {
let input = "int main() { return 0; }".to_string();
let mut lexer = Lexer::new(input);
let tokens = lexer.tokenize().unwrap();
assert!(!tokens.is_empty());
assert!(matches!(tokens[0].token_type, TokenType::Int));
}
@@ -23,8 +23,11 @@ mod tests {
let input = "42 3.14 'a' \"hello\"".to_string();
let mut lexer = Lexer::new(input);
let tokens = lexer.tokenize().unwrap();
assert!(matches!(tokens[0].token_type, TokenType::IntegerLiteral(42)));
assert!(matches!(
tokens[0].token_type,
TokenType::IntegerLiteral(42)
));
assert!(matches!(tokens[1].token_type, TokenType::FloatLiteral(_)));
assert!(matches!(tokens[2].token_type, TokenType::CharLiteral('a')));
assert!(matches!(tokens[3].token_type, TokenType::StringLiteral(_)));
@@ -35,7 +38,7 @@ mod tests {
let input = "+ - * / == != < > <= >=".to_string();
let mut lexer = Lexer::new(input);
let tokens = lexer.tokenize().unwrap();
assert!(matches!(tokens[0].token_type, TokenType::Plus));
assert!(matches!(tokens[1].token_type, TokenType::Minus));
assert!(matches!(tokens[2].token_type, TokenType::Multiply));
@@ -49,9 +52,10 @@ mod tests {
let input = "int x; // comment\n/* block comment */ int y;".to_string();
let mut lexer = Lexer::new(input);
let tokens = lexer.tokenize().unwrap();
// Comments should be filtered out
let identifier_count = tokens.iter()
let identifier_count = tokens
.iter()
.filter(|t| matches!(t.token_type, TokenType::Identifier(_)))
.count();
assert_eq!(identifier_count, 2); // x and y
@@ -64,7 +68,7 @@ mod tests {
let tokens = lexer.tokenize().unwrap();
let mut parser = Parser::new(tokens);
let program = parser.parse().unwrap();
assert_eq!(program.functions.len(), 1);
assert_eq!(program.functions[0].name, "main");
}
@@ -92,10 +96,10 @@ mod tests {
let tokens = lexer.tokenize().unwrap();
let mut parser = Parser::new(tokens);
let program = parser.parse().unwrap();
let mut codegen = CodeGenerator::new(Target::Amd64);
let assembly = codegen.generate(&program).unwrap();
assert!(assembly.contains("main:"));
assert!(assembly.contains("ret"));
}
@@ -136,13 +140,13 @@ mod tests {
#[test]
fn test_error_types() {
use alecc::error::AleccError;
let lex_error = AleccError::LexError {
line: 1,
column: 5,
message: "Unexpected character".to_string(),
};
assert!(format!("{}", lex_error).contains("line 1"));
assert!(format!("{}", lex_error).contains("column 5"));
}