Raytune metric description
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -199,7 +199,7 @@ def main():
|
|||||||
parser.add_argument("--text-det-unclip-ratio", type=float, default=1.5)
|
parser.add_argument("--text-det-unclip-ratio", type=float, default=1.5)
|
||||||
parser.add_argument("--text-rec-score-thresh", type=float, default=0.0)
|
parser.add_argument("--text-rec-score-thresh", type=float, default=0.0)
|
||||||
parser.add_argument("--line-tolerance", type=float, default=0.6)
|
parser.add_argument("--line-tolerance", type=float, default=0.6)
|
||||||
parser.add_argument("--min-box-score", type=int, default=0)
|
parser.add_argument("--min-box-score", type=float, default=0.0)
|
||||||
parser.add_argument("--pages-per-pdf", type=int, default=2)
|
parser.add_argument("--pages-per-pdf", type=int, default=2)
|
||||||
parser.add_argument("--lang", default="es")
|
parser.add_argument("--lang", default="es")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@@ -213,6 +213,7 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
cer_list, wer_list = [], []
|
cer_list, wer_list = [], []
|
||||||
|
time_per_page_list = []
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
for fname in os.listdir(args.pdf_folder):
|
for fname in os.listdir(args.pdf_folder):
|
||||||
@@ -223,6 +224,7 @@ def main():
|
|||||||
for i, img in enumerate(images):
|
for i, img in enumerate(images):
|
||||||
ref = pdf_extract_text(pdf_path, i+1)
|
ref = pdf_extract_text(pdf_path, i+1)
|
||||||
arr = np.array(img)
|
arr = np.array(img)
|
||||||
|
tp0 = time.time()
|
||||||
out = ocr.predict(
|
out = ocr.predict(
|
||||||
arr,
|
arr,
|
||||||
text_det_box_thresh=args.text_det_box_thresh,
|
text_det_box_thresh=args.text_det_box_thresh,
|
||||||
@@ -231,6 +233,7 @@ def main():
|
|||||||
use_textline_orientation=args.textline_orientation
|
use_textline_orientation=args.textline_orientation
|
||||||
)
|
)
|
||||||
pred = assemble_from_paddle_result(out, args.min_box_score, args.line_tolerance)
|
pred = assemble_from_paddle_result(out, args.min_box_score, args.line_tolerance)
|
||||||
|
time_per_page_list.append(float(time.time() - tp0))
|
||||||
m = evaluate_text(ref, pred)
|
m = evaluate_text(ref, pred)
|
||||||
cer_list.append(m["CER"])
|
cer_list.append(m["CER"])
|
||||||
wer_list.append(m["WER"])
|
wer_list.append(m["WER"])
|
||||||
@@ -238,8 +241,9 @@ def main():
|
|||||||
metrics = {
|
metrics = {
|
||||||
"CER": float(np.mean(cer_list) if cer_list else 1.0),
|
"CER": float(np.mean(cer_list) if cer_list else 1.0),
|
||||||
"WER": float(np.mean(wer_list) if wer_list else 1.0),
|
"WER": float(np.mean(wer_list) if wer_list else 1.0),
|
||||||
"time": float(time.time() - t0),
|
"TIME": float(time.time() - t0),
|
||||||
"pages": int(len(cer_list)),
|
"PAGES": int(len(cer_list)),
|
||||||
|
"TIME_PER_PAGE": float(np.mean(time_per_page_list) if time_per_page_list else float(time.time() - t0)),
|
||||||
}
|
}
|
||||||
print(json.dumps(metrics))
|
print(json.dumps(metrics))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user