C++调用Rust

阅读量: searchstar 2022-09-02 13:36:46
Categories: Tags:

相关:C语言调用Rust

场景

在C++中有一个类想用Rust的std::collections::BinaryHeap维护:

class A {
public:
	A(const char* data, size_t size) : data_(data), size_(size) {}
	const char* data() const { return data_; }
	size_t size() const { return size_; }
	bool operator < (const A& rhs) const {
		if (size() < rhs.size()) {
			return true;
		}
		if (size() > rhs.size()) {
			return false;
		}
		return memcmp(data(), rhs.data(), size()) < 0;
	}
private:
	const char* data_;
	size_t size_;
};

并且提供这些接口:

extern void* Create();
extern void Push(void* heap, A a);
extern A Pop(void* heap);
extern void Destroy(void* heap);

思路

Rust只能提供C语言的接口,结构体也只认C语言的结构体。

因此首先专门另写一份C++代码,调用Rust提供的C接口实现所需的C++接口,并且将C++提供的操作封装成C语言接口给Rust调用。

代码

结构

├── heap
│   ├── Cargo.lock
│   ├── Cargo.toml
│   ├── cpp
│   │   ├── cpp_to_rust.cpp
│   │   ├── c_struct.h
│   │   └── rust_to_cpp.cpp
│   ├── include
│   │   └── heap.h
│   ├── Makefile
│   ├── src
│   │   └── lib.rs
│   └── target
├── test.cpp
└── test.h

C++使用端

test.h:

#ifndef TEST_H_
#define TEST_H_

#include <cstddef>
#include <cstring>

class A {
public:
	A(const char* data, size_t size) : data_(data), size_(size) {}
	const char* data() const { return data_; }
	size_t size() const { return size_; }
	bool operator < (const A& rhs) const {
		if (size() < rhs.size()) {
			return true;
		}
		if (size() > rhs.size()) {
			return false;
		}
		return memcmp(data(), rhs.data(), size()) < 0;
	}
private:
	const char* data_;
	size_t size_;
};

#endif // TEST_H_

test.cpp:

#include <cstdio>
#include <cstring>

#include "test.h"
#include "heap.h"

using namespace std;

int main() {
	void* heap = Create();
	Push(heap, A("233", 3));
	Push(heap, A("2333", 4));
	Push(heap, A("332", 3));
	puts(Pop(heap).data());
	puts(Pop(heap).data());
	puts(Pop(heap).data());
	Destroy(heap);

	return 0;
}

新建Rust项目

cargo new heap --lib

heap/Cargo.toml中加入:

[lib]
crate-type = ["staticlib"]

heap/src/lib.rs:

use std::collections::BinaryHeap;
use std::cmp::Ordering;
use std::os::raw::c_char;

extern "C" {
    fn compare(a: *const C_A, b: *const C_A) -> bool;
}

#[repr(C)]
#[derive(Eq)]
pub struct C_A {
    data: *const c_char,
    size: usize,
}

impl Ord for C_A {
    fn cmp(&self, rhs: &C_A) -> Ordering {
        unsafe {
            let a: *const C_A = self;
            let b: *const C_A = rhs;
            if compare(a, b) {
                Ordering::Less
            } else if compare(b, a) {
                Ordering::Greater
            } else {
                Ordering::Equal
            }
        }
    }
}

impl PartialOrd for C_A {
    fn partial_cmp(&self, rhs: &C_A) -> Option<Ordering> {
        Some(self.cmp(rhs))
    }
}

impl PartialEq for C_A {
    fn eq(&self, rhs: &C_A) -> bool {
        self.cmp(rhs) == Ordering::Equal
    }
}

#[no_mangle]
pub extern "C" fn __rust_create() -> *mut BinaryHeap<C_A> {
    Box::into_raw(Box::new(BinaryHeap::<C_A>::new()))
}

#[no_mangle]
pub extern "C" fn __rust_push(heap: *mut BinaryHeap<C_A>, a: C_A) {
    let heap = unsafe { &mut *heap };
    heap.push(a);
}

#[no_mangle]
pub extern "C" fn __rust_pop(heap: *mut BinaryHeap<C_A>) -> C_A {
    let heap = unsafe { &mut *heap };
    // Use unwrap_unchecked to avoid panic. C++ does not know how to panic anyway.
    unsafe { heap.pop().unwrap_unchecked() }
}

#[no_mangle]
pub extern "C" fn __rust_destroy(heap: *mut BinaryHeap<C_A>) {
    unsafe { Box::from_raw(heap) };
}

#[repr(C)]: repr的意思是representation。repr(C)表示用C语言的方式来组织这个结构体,这样这个结构体就可以在C语言和Rust之间传递了。

参考:https://stackoverflow.com/questions/24105186/can-i-call-c-or-c-functions-from-rust-code

C语言结构体

Rust只兼容C语言结构体,所以要先把C++结构体变成C语言结构体,才能传给Rust用。同样,Rust传出来的结构体也只能是C语言结构体,要转成对应的C++结构体。

heap/cpp/c_struct.h:

#ifndef C_STRUCT_H_
#define c_STRUCT_H_

#include <cstddef>

extern "C" {
	struct CA {
		const char* data;
		size_t size;
	};
}

#endif // C_STRUCT_H_

这里的struct CA就是Rust里的struct C_A。这里特定用了不同的名字,说明两者名字不一定要相同。

参考:https://stackoverflow.com/questions/62126501/how-to-pass-a-c-struct-to-rust

将C++操作封装成C语言接口

BinaryHeap需要用到比较操作。但是A的比较操作是operator <,Rust显然不能直接用。因此需要将这个操作封装成C语言结构,Rust才能用。

heap/cpp/cpp_to_rust.cpp:

#include "test.h"
#include "c_struct.h"

extern "C" {
	bool compare(const CA* ca, const CA* cb);
}

bool compare(const CA* ca, const CA* cb) {
	A a(ca->data, ca->size);
	A b(cb->data, cb->size);
	return a < b;
}

使用Rust接口实现所需的C++接口

heap/cpp/rust_to_cpp.cpp:

#include "test.h"
#include "c_struct.h"

#include <cstddef>

extern "C" {
	extern void* __rust_create();
	extern void __rust_push(void* heap, CA a);
	extern CA __rust_pop(void* heap);
	extern void __rust_destroy(void* heap);
}


void* Create() {
	return __rust_create();
}

void Push(void* heap, A a) {
	struct CA ca{
			.data = a.data(),
			.size = a.size(),
	};
	__rust_push(heap, ca);
}

A Pop(void* heap) {
	CA ca = __rust_pop(heap);
	return A(ca.data, ca.size);
}

void Destroy(void* heap) {
	__rust_destroy(heap);
}

将实现的C++接口放进头文件

include/heap.h:

#ifndef HEAP_H_
#define HEAP_H_

#include "test.h"

extern void* Create();
extern void Push(void* heap, A a);
extern A Pop(void* heap);
extern void Destroy(void* heap);

#endif // HEAP_H_

Makefile

heap/Makefile:

ifndef INCLUDE_DIR
# 这里不能缩进。
$(error INCLUDE_DIR is undefined)
endif

target/cpp_to_rust.o: cpp/cpp_to_rust.cpp cpp/c_struct.h $(INCLUDE_DIR)/test.h
	g++ -I $(INCLUDE_DIR) cpp/cpp_to_rust.cpp -c -o $@

target/rust_to_cpp.o: cpp/rust_to_cpp.cpp cpp/c_struct.h $(INCLUDE_DIR)/test.h
	g++ -fPIC -I $(INCLUDE_DIR) cpp/rust_to_cpp.cpp -c -o $@

target/debug/libheap.a: $(shell find src -type f) Cargo.toml Cargo.lock
	cargo build

target/debug/libheap.o: target/cpp_to_rust.o target/rust_to_cpp.o target/debug/libheap.a
	ld -r -o $@ --whole-archive target/rust_to_cpp.o --no-whole-archive target/cpp_to_rust.o target/debug/libheap.a

obj_debug: target/debug/libheap.o

target/debug/libheap.so: obj_debug
	gcc -shared $^ -ldl -o $@

shared_lib_debug: target/debug/libheap.so

.PHONY: debug shared_lib_debug

其中ld-r表示relocatable。按照我的理解,由于要将多个.o文件合并起来,所以不可避免地要进行relocation

--whole-archive:将后面的文件里的所有符号都加入到目标文件中。--no-whole-archive是取消前面的--whole-archive的影响,使得后面的文件里只有用到了的符号才加入到目标文件。这里rust_to_cpp.o里存储了需要给使用端用的C++接口,因此里面的符号应该全部加入到目标文件中。而其他的选择性加入即可。

然后make obj_debug INCLUDE_DIR=..即可编译出target/debug/libheap.o,里面有我们需要的C++接口。make shared_lib_debug INCLUDE_DIR=..即可编译出动态库target/debug/libheap.so

参考:

https://stackoverflow.com/questions/3821916/how-to-merge-two-ar-static-libraries-into-one

https://stackoverflow.com/questions/29391965/what-is-partial-linking-in-gnu-linker

https://stackoverflow.com/questions/14289513/makefile-rule-that-depends-on-all-files-under-a-directory-including-within-subd

https://stackoverflow.com/questions/4728810/how-to-ensure-makefile-variable-is-set-as-a-prerequisite

缩进后的语句一律视为编译目标的一部分,作为shell语句解释。https://anclark.github.io/2021/02/09/Programming_Tips/Makefile_%E8%B8%A9%E5%9D%91%E8%AE%B0/

https://stackoverflow.com/questions/2826029/passing-additional-variables-from-command-line-to-make

-ldl: [编译错误undefined reference to dlsym'](https://blog.csdn.net/shareyao/article/details/5362642)。不过这里好像不加-ldl`也不会报这个错误。

使用

用obj文件:

gcc -I heap/include/ -I. heap/target/debug/libheap.o test.cpp -o test
./test

用动态库:

gcc -I heap/include -I. -L heap/target/debug/ -lheap test.cpp -o test
LD_LIBRARY_PATH=heap/target/debug/ ./test

输出:

2333
332
233

注意事项

由于Rust编译出来的目标文件里会自带很多符号,因此本博客提供的方法无法实现C++代码调用多个Rust工程提供的接口,需要将所有Rust接口的实现都塞进一个Rust工程。暂时不清楚解决方案。