03/03/2026
while (parser.nextToken() != null) {
if (parser.currentToken() == JsonToken.FIELD_NAME
&& "pages".equals(parser.currentName())) {
parser.nextToken(); // START_ARRAY
while (parser.nextToken() != JsonToken.END_ARRAY) {
Integer currentPage = null;
while (parser.nextToken() != JsonToken.END_OBJECT) {
if (parser.currentToken() == JsonToken.FIELD_NAME
&& "page_number".equals(parser.currentName())) {
parser.nextToken();
currentPage = parser.getIntValue();
}
if (parser.currentToken() == JsonToken.FIELD_NAME
&& "elements".equals(parser.currentName())) {
parser.nextToken(); // START_ARRAY
while (parser.nextToken() != JsonToken.END_ARRAY) {
Integer id = null;
String type = null;
String text = null;
while (parser.nextToken() != JsonToken.END_OBJECT) {
if (parser.currentToken() == JsonToken.FIELD_NAME) {
String field = parser.currentName();
parser.nextToken();
if ("id".equals(field))
id = parser.getIntValue();
if ("type".equals(field))
type = parser.getValueAsString();
if ("text".equals(field))
text = parser.getValueAsString();
}
}
if (text != null && currentPage != null && id != null) {
writer.write(currentPage + "$$" + id + "$$" + type + "$$" + text);
writer.newLine();
}
}
}
}
}
}
}
ObjectMapper mapper = new ObjectMapper();
JsonNode root = mapper.readTree(jsonFile);
for (JsonNode page : root.get("pages")) {
int pageNumber = page.get("page_number").asInt();
for (JsonNode el : page.get("elements")) {
int id = el.get("id").asInt();
String type = el.get("type").asText();
String text = el.get("text").asText();
writer.write(pageNumber + "$$" + id + "$$" + type + "$$" + text);
writer.newLine();
}
}
private static void convertSingleJson(Path jsonFile,
Path txtDir,
ObjectMapper mapper) throws IOException {
// 출력 파일 경로 생성
String fileName = jsonFile.getFileName().toString()
.replace(".json", ".csv");
Path outputFile = txtDir.resolve(fileName);
try (BufferedWriter writer = Files.newBufferedWriter(
outputFile,
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)) {
// 헤더
writer.write("page_number$$element_id$$type$$text");
writer.newLine();
// JSON 읽기
JsonNode root = mapper.readTree(jsonFile);
JsonNode pages = root.get("pages");
if (pages == null || !pages.isArray()) {
return; // pages 없으면 종료
}
for (JsonNode page : pages) {
Integer pageNumber = page.has("page_number")
? page.get("page_number").asInt()
: null;
if (pageNumber == null) continue;
JsonNode elements = page.get("elements");
if (elements == null || !elements.isArray()) continue;
for (JsonNode element : elements) {
Integer id = element.has("id")
? element.get("id").asInt()
: null;
String type = element.has("type")
? element.get("type").asText()
: "-";
String text = element.has("text")
? element.get("text").asText()
: null;
if (id == null || text == null || text.isBlank()) continue;
// 줄바꿈/개행 정리
text = text.replace("\r", " ")
.replace("\n", " ")
.trim();
writer.write(pageNumber + "$$"
+ id + "$$"
+ type + "$$"
+ text);
writer.newLine();
}
}
}
}