eee

// =============================
// SemanticChunkUtil.java
// =============================

import java.util.; import java.util.regex.;

public class SemanticChunkUtil {

// 최대 길이 제한 (토큰 폭주 방지)
private static final int MAX_LENGTH = 800;

// 룰 기반으로 텍스트 분리
public static List<String> splitByRule(String text) {

    List<String> result = new ArrayList<>();

    // 1차: 문단 기준 분리
    String[] paragraphs = text.split("\\n+");

    for (String para : paragraphs) {

        if (para.trim().isEmpty()) continue;

        // 길이가 짧으면 그대로 사용
        if (para.length() <= MAX_LENGTH) {
            result.add(para.trim());
        } else {

            // 2차: 문장 기준 분리
            String[] sentences = para.split("(?<=\\.)");
            StringBuilder buffer = new StringBuilder();

            for (String sentence : sentences) {

                if (buffer.length() + sentence.length() > MAX_LENGTH) {
                    result.add(buffer.toString().trim());
                    buffer = new StringBuilder();
                }

                buffer.append(sentence);
            }

            if (buffer.length() > 0) {
                result.add(buffer.toString().trim());
            }
        }
    }

    return result;
}

}

// =============================
// FileChunkWriter.java
// =============================

import java.nio.file.*;
import java.io.IOException;
import java.util.List;

public class FileChunkWriter {

// chunk 파일 저장
public static void saveChunks(List<String> chunks, String dirPath) throws IOException {

    Path dir = Paths.get(dirPath);

    if (!Files.exists(dir)) {
        Files.createDirectories(dir);
    }

    for (int i = 0; i < chunks.size(); i++) {
        Path filePath = dir.resolve("chunk_" + i + ".txt");
        Files.writeString(filePath, chunks.get(i));
    }
}

}

// =============================
// FileChunkReader.java
// =============================

import java.nio.file.; import java.io.IOException; import java.util.;

public class FileChunkReader {

// 저장된 chunk 파일 읽기
public static List<String> readChunks(String dirPath) throws IOException {

    List<String> result = new ArrayList<>();

    Files.list(Paths.get(dirPath))
            .sorted()
            .forEach(path -> {
                try {
                    result.add(Files.readString(path));
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            });

    return result;
}

}

// =============================
// SemanticChunkProcessor.java
// =============================

public class SemanticChunkProcessor {

private final FabrxClient fabrxClient;

public SemanticChunkProcessor(FabrxClient fabrxClient) {
    this.fabrxClient = fabrxClient;
}

// LLM 호출 (chunk 단위)
public String processChunk(String chunk) {

    String systemPrompt = """
    당신은 semantic chunk 전문가입니다.
    입력 문장을 의미 단위로 나누고 JSON으로 반환하세요.
    형식:
    {
      "count": "개수",
      "contents": ["문장1", "문장2"]
    }
    """;

    return fabrxClient.call(systemPrompt, chunk);
}

}

// =============================
// ResultWriter.java
// =============================

import java.nio.file.*;
import java.io.IOException;

public class ResultWriter {

// LLM 결과 JSON 저장
public static void saveResult(String dirPath, int index, String resultJson) throws IOException {

    Path dir = Paths.get(dirPath);

    if (!Files.exists(dir)) {
        Files.createDirectories(dir);
    }

    Path path = dir.resolve("result_" + index + ".json");
    Files.writeString(path, resultJson);
}

}

// =============================
// ChunkPipelineService.java
// =============================

import java.util.*;
import java.io.IOException;

public class ChunkPipelineService {

private final SemanticChunkProcessor processor;

public ChunkPipelineService(SemanticChunkProcessor processor) {
    this.processor = processor;
}

// 전체 파이프라인 실행
public void execute(String originalText) throws IOException {

    String baseDir = "C:/chunk_work";

    // 1. 룰 기반 분리
    List<String> chunks = SemanticChunkUtil.splitByRule(originalText);

    // 2. 파일 저장
    FileChunkWriter.saveChunks(chunks, baseDir + "/input");

    // 3. 파일 읽기
    List<String> readChunks = FileChunkReader.readChunks(baseDir + "/input");

    // 4. chunk 단위 LLM 처리
    for (int i = 0; i < readChunks.size(); i++) {

        String result = processor.processChunk(readChunks.get(i));

        // 5. 결과 저장
        ResultWriter.saveResult(baseDir + "/output", i, result);
    }
}

}

AstroScent

Leave a Reply Cancel reply

Comments

Archives

Categories